blob: e75ab94e10f788a372570689dd4d6ff017a81c53 [file] [log] [blame]
Jeremy Hylton3e0055f2005-10-20 19:59:25 +00001#include "Python.h"
2#include "code.h"
3#include "structmember.h"
4
5#define NAME_CHARS \
6 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
7
8/* all_name_chars(s): true iff all chars in s are valid NAME_CHARS */
9
10static int
11all_name_chars(unsigned char *s)
12{
13 static char ok_name_char[256];
14 static unsigned char *name_chars = (unsigned char *)NAME_CHARS;
15
16 if (ok_name_char[*name_chars] == 0) {
17 unsigned char *p;
18 for (p = name_chars; *p; p++)
19 ok_name_char[*p] = 1;
20 }
21 while (*s) {
22 if (ok_name_char[*s++] == 0)
23 return 0;
24 }
25 return 1;
26}
27
28static void
29intern_strings(PyObject *tuple)
30{
Martin v. Löwis18e16552006-02-15 17:27:45 +000031 Py_ssize_t i;
Jeremy Hylton3e0055f2005-10-20 19:59:25 +000032
33 for (i = PyTuple_GET_SIZE(tuple); --i >= 0; ) {
34 PyObject *v = PyTuple_GET_ITEM(tuple, i);
35 if (v == NULL || !PyString_CheckExact(v)) {
36 Py_FatalError("non-string found in code slot");
37 }
38 PyString_InternInPlace(&PyTuple_GET_ITEM(tuple, i));
39 }
40}
41
42
43PyCodeObject *
44PyCode_New(int argcount, int nlocals, int stacksize, int flags,
45 PyObject *code, PyObject *consts, PyObject *names,
46 PyObject *varnames, PyObject *freevars, PyObject *cellvars,
47 PyObject *filename, PyObject *name, int firstlineno,
48 PyObject *lnotab)
49{
50 PyCodeObject *co;
Martin v. Löwis18e16552006-02-15 17:27:45 +000051 Py_ssize_t i;
Jeremy Hylton3e0055f2005-10-20 19:59:25 +000052 /* Check argument types */
53 if (argcount < 0 || nlocals < 0 ||
54 code == NULL ||
55 consts == NULL || !PyTuple_Check(consts) ||
56 names == NULL || !PyTuple_Check(names) ||
57 varnames == NULL || !PyTuple_Check(varnames) ||
58 freevars == NULL || !PyTuple_Check(freevars) ||
59 cellvars == NULL || !PyTuple_Check(cellvars) ||
60 name == NULL || !PyString_Check(name) ||
61 filename == NULL || !PyString_Check(filename) ||
62 lnotab == NULL || !PyString_Check(lnotab) ||
63 !PyObject_CheckReadBuffer(code)) {
64 PyErr_BadInternalCall();
65 return NULL;
66 }
67 intern_strings(names);
68 intern_strings(varnames);
69 intern_strings(freevars);
70 intern_strings(cellvars);
71 /* Intern selected string constants */
72 for (i = PyTuple_Size(consts); --i >= 0; ) {
73 PyObject *v = PyTuple_GetItem(consts, i);
74 if (!PyString_Check(v))
75 continue;
76 if (!all_name_chars((unsigned char *)PyString_AS_STRING(v)))
77 continue;
78 PyString_InternInPlace(&PyTuple_GET_ITEM(consts, i));
79 }
80 co = PyObject_NEW(PyCodeObject, &PyCode_Type);
81 if (co != NULL) {
82 co->co_argcount = argcount;
83 co->co_nlocals = nlocals;
84 co->co_stacksize = stacksize;
85 co->co_flags = flags;
86 Py_INCREF(code);
87 co->co_code = code;
88 Py_INCREF(consts);
89 co->co_consts = consts;
90 Py_INCREF(names);
91 co->co_names = names;
92 Py_INCREF(varnames);
93 co->co_varnames = varnames;
94 Py_INCREF(freevars);
95 co->co_freevars = freevars;
96 Py_INCREF(cellvars);
97 co->co_cellvars = cellvars;
98 Py_INCREF(filename);
99 co->co_filename = filename;
100 Py_INCREF(name);
101 co->co_name = name;
102 co->co_firstlineno = firstlineno;
103 Py_INCREF(lnotab);
104 co->co_lnotab = lnotab;
Richard Jones7c88dcc2006-05-23 10:37:38 +0000105 co->co_zombieframe = NULL;
Jeremy Hylton3e0055f2005-10-20 19:59:25 +0000106 }
107 return co;
108}
109
110
111#define OFF(x) offsetof(PyCodeObject, x)
112
113static PyMemberDef code_memberlist[] = {
114 {"co_argcount", T_INT, OFF(co_argcount), READONLY},
115 {"co_nlocals", T_INT, OFF(co_nlocals), READONLY},
116 {"co_stacksize",T_INT, OFF(co_stacksize), READONLY},
117 {"co_flags", T_INT, OFF(co_flags), READONLY},
118 {"co_code", T_OBJECT, OFF(co_code), READONLY},
119 {"co_consts", T_OBJECT, OFF(co_consts), READONLY},
120 {"co_names", T_OBJECT, OFF(co_names), READONLY},
121 {"co_varnames", T_OBJECT, OFF(co_varnames), READONLY},
122 {"co_freevars", T_OBJECT, OFF(co_freevars), READONLY},
123 {"co_cellvars", T_OBJECT, OFF(co_cellvars), READONLY},
124 {"co_filename", T_OBJECT, OFF(co_filename), READONLY},
125 {"co_name", T_OBJECT, OFF(co_name), READONLY},
126 {"co_firstlineno", T_INT, OFF(co_firstlineno), READONLY},
127 {"co_lnotab", T_OBJECT, OFF(co_lnotab), READONLY},
128 {NULL} /* Sentinel */
129};
130
131/* Helper for code_new: return a shallow copy of a tuple that is
132 guaranteed to contain exact strings, by converting string subclasses
133 to exact strings and complaining if a non-string is found. */
134static PyObject*
135validate_and_copy_tuple(PyObject *tup)
136{
137 PyObject *newtuple;
138 PyObject *item;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000139 Py_ssize_t i, len;
Jeremy Hylton3e0055f2005-10-20 19:59:25 +0000140
141 len = PyTuple_GET_SIZE(tup);
142 newtuple = PyTuple_New(len);
143 if (newtuple == NULL)
144 return NULL;
145
146 for (i = 0; i < len; i++) {
147 item = PyTuple_GET_ITEM(tup, i);
148 if (PyString_CheckExact(item)) {
149 Py_INCREF(item);
150 }
151 else if (!PyString_Check(item)) {
152 PyErr_Format(
153 PyExc_TypeError,
154 "name tuples must contain only "
155 "strings, not '%.500s'",
156 item->ob_type->tp_name);
157 Py_DECREF(newtuple);
158 return NULL;
159 }
160 else {
161 item = PyString_FromStringAndSize(
162 PyString_AS_STRING(item),
163 PyString_GET_SIZE(item));
164 if (item == NULL) {
165 Py_DECREF(newtuple);
166 return NULL;
167 }
168 }
169 PyTuple_SET_ITEM(newtuple, i, item);
170 }
171
172 return newtuple;
173}
174
175PyDoc_STRVAR(code_doc,
176"code(argcount, nlocals, stacksize, flags, codestring, constants, names,\n\
177 varnames, filename, name, firstlineno, lnotab[, freevars[, cellvars]])\n\
178\n\
179Create a code object. Not for the faint of heart.");
180
181static PyObject *
182code_new(PyTypeObject *type, PyObject *args, PyObject *kw)
183{
184 int argcount;
185 int nlocals;
186 int stacksize;
187 int flags;
188 PyObject *co = NULL;
189 PyObject *code;
190 PyObject *consts;
191 PyObject *names, *ournames = NULL;
192 PyObject *varnames, *ourvarnames = NULL;
193 PyObject *freevars = NULL, *ourfreevars = NULL;
194 PyObject *cellvars = NULL, *ourcellvars = NULL;
195 PyObject *filename;
196 PyObject *name;
197 int firstlineno;
198 PyObject *lnotab;
199
200 if (!PyArg_ParseTuple(args, "iiiiSO!O!O!SSiS|O!O!:code",
201 &argcount, &nlocals, &stacksize, &flags,
202 &code,
203 &PyTuple_Type, &consts,
204 &PyTuple_Type, &names,
205 &PyTuple_Type, &varnames,
206 &filename, &name,
207 &firstlineno, &lnotab,
208 &PyTuple_Type, &freevars,
209 &PyTuple_Type, &cellvars))
210 return NULL;
211
212 if (argcount < 0) {
213 PyErr_SetString(
214 PyExc_ValueError,
215 "code: argcount must not be negative");
216 goto cleanup;
217 }
218
219 if (nlocals < 0) {
220 PyErr_SetString(
221 PyExc_ValueError,
222 "code: nlocals must not be negative");
223 goto cleanup;
224 }
225
226 ournames = validate_and_copy_tuple(names);
227 if (ournames == NULL)
228 goto cleanup;
229 ourvarnames = validate_and_copy_tuple(varnames);
230 if (ourvarnames == NULL)
231 goto cleanup;
232 if (freevars)
233 ourfreevars = validate_and_copy_tuple(freevars);
234 else
235 ourfreevars = PyTuple_New(0);
236 if (ourfreevars == NULL)
237 goto cleanup;
238 if (cellvars)
239 ourcellvars = validate_and_copy_tuple(cellvars);
240 else
241 ourcellvars = PyTuple_New(0);
242 if (ourcellvars == NULL)
243 goto cleanup;
244
245 co = (PyObject *)PyCode_New(argcount, nlocals, stacksize, flags,
246 code, consts, ournames, ourvarnames,
247 ourfreevars, ourcellvars, filename,
248 name, firstlineno, lnotab);
249 cleanup:
250 Py_XDECREF(ournames);
251 Py_XDECREF(ourvarnames);
252 Py_XDECREF(ourfreevars);
253 Py_XDECREF(ourcellvars);
254 return co;
255}
256
257static void
258code_dealloc(PyCodeObject *co)
259{
260 Py_XDECREF(co->co_code);
261 Py_XDECREF(co->co_consts);
262 Py_XDECREF(co->co_names);
263 Py_XDECREF(co->co_varnames);
264 Py_XDECREF(co->co_freevars);
265 Py_XDECREF(co->co_cellvars);
266 Py_XDECREF(co->co_filename);
267 Py_XDECREF(co->co_name);
268 Py_XDECREF(co->co_lnotab);
Richard Jones7c88dcc2006-05-23 10:37:38 +0000269 if (co->co_zombieframe != NULL)
270 PyObject_GC_Del(co->co_zombieframe);
Jeremy Hylton3e0055f2005-10-20 19:59:25 +0000271 PyObject_DEL(co);
272}
273
274static PyObject *
275code_repr(PyCodeObject *co)
276{
277 char buf[500];
278 int lineno = -1;
279 char *filename = "???";
280 char *name = "???";
281
282 if (co->co_firstlineno != 0)
283 lineno = co->co_firstlineno;
284 if (co->co_filename && PyString_Check(co->co_filename))
285 filename = PyString_AS_STRING(co->co_filename);
286 if (co->co_name && PyString_Check(co->co_name))
287 name = PyString_AS_STRING(co->co_name);
288 PyOS_snprintf(buf, sizeof(buf),
289 "<code object %.100s at %p, file \"%.300s\", line %d>",
290 name, co, filename, lineno);
291 return PyString_FromString(buf);
292}
293
294static int
295code_compare(PyCodeObject *co, PyCodeObject *cp)
296{
297 int cmp;
298 cmp = PyObject_Compare(co->co_name, cp->co_name);
299 if (cmp) return cmp;
300 cmp = co->co_argcount - cp->co_argcount;
301 if (cmp) goto normalize;
302 cmp = co->co_nlocals - cp->co_nlocals;
303 if (cmp) goto normalize;
304 cmp = co->co_flags - cp->co_flags;
305 if (cmp) goto normalize;
306 cmp = co->co_firstlineno - cp->co_firstlineno;
307 if (cmp) goto normalize;
308 cmp = PyObject_Compare(co->co_code, cp->co_code);
309 if (cmp) return cmp;
310 cmp = PyObject_Compare(co->co_consts, cp->co_consts);
311 if (cmp) return cmp;
312 cmp = PyObject_Compare(co->co_names, cp->co_names);
313 if (cmp) return cmp;
314 cmp = PyObject_Compare(co->co_varnames, cp->co_varnames);
315 if (cmp) return cmp;
316 cmp = PyObject_Compare(co->co_freevars, cp->co_freevars);
317 if (cmp) return cmp;
318 cmp = PyObject_Compare(co->co_cellvars, cp->co_cellvars);
319 return cmp;
320
321 normalize:
322 if (cmp > 0)
323 return 1;
324 else if (cmp < 0)
325 return -1;
326 else
327 return 0;
328}
329
330static long
331code_hash(PyCodeObject *co)
332{
333 long h, h0, h1, h2, h3, h4, h5, h6;
334 h0 = PyObject_Hash(co->co_name);
335 if (h0 == -1) return -1;
336 h1 = PyObject_Hash(co->co_code);
337 if (h1 == -1) return -1;
338 h2 = PyObject_Hash(co->co_consts);
339 if (h2 == -1) return -1;
340 h3 = PyObject_Hash(co->co_names);
341 if (h3 == -1) return -1;
342 h4 = PyObject_Hash(co->co_varnames);
343 if (h4 == -1) return -1;
344 h5 = PyObject_Hash(co->co_freevars);
345 if (h5 == -1) return -1;
346 h6 = PyObject_Hash(co->co_cellvars);
347 if (h6 == -1) return -1;
348 h = h0 ^ h1 ^ h2 ^ h3 ^ h4 ^ h5 ^ h6 ^
349 co->co_argcount ^ co->co_nlocals ^ co->co_flags;
350 if (h == -1) h = -2;
351 return h;
352}
353
354/* XXX code objects need to participate in GC? */
355
356PyTypeObject PyCode_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +0000357 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Jeremy Hylton3e0055f2005-10-20 19:59:25 +0000358 "code",
359 sizeof(PyCodeObject),
360 0,
361 (destructor)code_dealloc, /* tp_dealloc */
362 0, /* tp_print */
363 0, /* tp_getattr */
364 0, /* tp_setattr */
365 (cmpfunc)code_compare, /* tp_compare */
366 (reprfunc)code_repr, /* tp_repr */
367 0, /* tp_as_number */
368 0, /* tp_as_sequence */
369 0, /* tp_as_mapping */
370 (hashfunc)code_hash, /* tp_hash */
371 0, /* tp_call */
372 0, /* tp_str */
373 PyObject_GenericGetAttr, /* tp_getattro */
374 0, /* tp_setattro */
375 0, /* tp_as_buffer */
376 Py_TPFLAGS_DEFAULT, /* tp_flags */
377 code_doc, /* tp_doc */
378 0, /* tp_traverse */
379 0, /* tp_clear */
380 0, /* tp_richcompare */
381 0, /* tp_weaklistoffset */
382 0, /* tp_iter */
383 0, /* tp_iternext */
384 0, /* tp_methods */
385 code_memberlist, /* tp_members */
386 0, /* tp_getset */
387 0, /* tp_base */
388 0, /* tp_dict */
389 0, /* tp_descr_get */
390 0, /* tp_descr_set */
391 0, /* tp_dictoffset */
392 0, /* tp_init */
393 0, /* tp_alloc */
394 code_new, /* tp_new */
395};
396
397/* All about c_lnotab.
398
399c_lnotab is an array of unsigned bytes disguised as a Python string. In -O
400mode, SET_LINENO opcodes aren't generated, and bytecode offsets are mapped
401to source code line #s (when needed for tracebacks) via c_lnotab instead.
402The array is conceptually a list of
403 (bytecode offset increment, line number increment)
404pairs. The details are important and delicate, best illustrated by example:
405
406 byte code offset source code line number
407 0 1
408 6 2
409 50 7
410 350 307
411 361 308
412
413The first trick is that these numbers aren't stored, only the increments
414from one row to the next (this doesn't really work, but it's a start):
415
416 0, 1, 6, 1, 44, 5, 300, 300, 11, 1
417
418The second trick is that an unsigned byte can't hold negative values, or
419values larger than 255, so (a) there's a deep assumption that byte code
420offsets and their corresponding line #s both increase monotonically, and (b)
421if at least one column jumps by more than 255 from one row to the next, more
422than one pair is written to the table. In case #b, there's no way to know
423from looking at the table later how many were written. That's the delicate
424part. A user of c_lnotab desiring to find the source line number
425corresponding to a bytecode address A should do something like this
426
427 lineno = addr = 0
428 for addr_incr, line_incr in c_lnotab:
429 addr += addr_incr
430 if addr > A:
431 return lineno
432 lineno += line_incr
433
434In order for this to work, when the addr field increments by more than 255,
435the line # increment in each pair generated must be 0 until the remaining addr
436increment is < 256. So, in the example above, com_set_lineno should not (as
437was actually done until 2.2) expand 300, 300 to 255, 255, 45, 45, but to
438255, 0, 45, 255, 0, 45.
439*/
440
441int
442PyCode_Addr2Line(PyCodeObject *co, int addrq)
443{
444 int size = PyString_Size(co->co_lnotab) / 2;
445 unsigned char *p = (unsigned char*)PyString_AsString(co->co_lnotab);
446 int line = co->co_firstlineno;
447 int addr = 0;
448 while (--size >= 0) {
449 addr += *p++;
450 if (addr > addrq)
451 break;
452 line += *p++;
453 }
454 return line;
455}
Jeremy Hyltona4ebc132006-04-18 14:47:00 +0000456
457/*
458 Check whether the current instruction is at the start of a line.
459
460 */
461
462 /* The theory of SET_LINENO-less tracing.
463
464 In a nutshell, we use the co_lnotab field of the code object
465 to tell when execution has moved onto a different line.
466
467 As mentioned above, the basic idea is so set things up so
468 that
469
470 *instr_lb <= frame->f_lasti < *instr_ub
471
472 is true so long as execution does not change lines.
473
474 This is all fairly simple. Digging the information out of
475 co_lnotab takes some work, but is conceptually clear.
476
477 Somewhat harder to explain is why we don't *always* call the
478 line trace function when the above test fails.
479
480 Consider this code:
481
482 1: def f(a):
483 2: if a:
484 3: print 1
485 4: else:
486 5: print 2
487
488 which compiles to this:
489
490 2 0 LOAD_FAST 0 (a)
491 3 JUMP_IF_FALSE 9 (to 15)
492 6 POP_TOP
493
494 3 7 LOAD_CONST 1 (1)
495 10 PRINT_ITEM
496 11 PRINT_NEWLINE
497 12 JUMP_FORWARD 6 (to 21)
498 >> 15 POP_TOP
499
500 5 16 LOAD_CONST 2 (2)
501 19 PRINT_ITEM
502 20 PRINT_NEWLINE
503 >> 21 LOAD_CONST 0 (None)
504 24 RETURN_VALUE
505
506 If 'a' is false, execution will jump to instruction at offset
507 15 and the co_lnotab will claim that execution has moved to
508 line 3. This is at best misleading. In this case we could
509 associate the POP_TOP with line 4, but that doesn't make
510 sense in all cases (I think).
511
512 What we do is only call the line trace function if the co_lnotab
513 indicates we have jumped to the *start* of a line, i.e. if the
514 current instruction offset matches the offset given for the
515 start of a line by the co_lnotab.
516
517 This also takes care of the situation where 'a' is true.
518 Execution will jump from instruction offset 12 to offset 21.
519 Then the co_lnotab would imply that execution has moved to line
520 5, which is again misleading.
521
522 Why do we set f_lineno when tracing? Well, consider the code
523 above when 'a' is true. If stepping through this with 'n' in
524 pdb, you would stop at line 1 with a "call" type event, then
525 line events on lines 2 and 3, then a "return" type event -- but
526 you would be shown line 5 during this event. This is a change
527 from the behaviour in 2.2 and before, and I've found it
528 confusing in practice. By setting and using f_lineno when
529 tracing, one can report a line number different from that
530 suggested by f_lasti on this one occasion where it's desirable.
531 */
532
533
534int
535PyCode_CheckLineNumber(PyCodeObject* co, int lasti, PyAddrPair *bounds)
536{
537 int size, addr, line;
538 unsigned char* p;
539
540 p = (unsigned char*)PyString_AS_STRING(co->co_lnotab);
541 size = PyString_GET_SIZE(co->co_lnotab) / 2;
542
543 addr = 0;
544 line = co->co_firstlineno;
545 assert(line > 0);
546
547 /* possible optimization: if f->f_lasti == instr_ub
548 (likely to be a common case) then we already know
549 instr_lb -- if we stored the matching value of p
550 somwhere we could skip the first while loop. */
551
552 /* see comments in compile.c for the description of
553 co_lnotab. A point to remember: increments to p
554 should come in pairs -- although we don't care about
555 the line increments here, treating them as byte
556 increments gets confusing, to say the least. */
557
Neal Norwitz7e49c6e2006-07-12 05:27:46 +0000558 bounds->ap_lower = 0;
Jeremy Hyltona4ebc132006-04-18 14:47:00 +0000559 while (size > 0) {
560 if (addr + *p > lasti)
561 break;
562 addr += *p++;
563 if (*p)
564 bounds->ap_lower = addr;
565 line += *p++;
566 --size;
567 }
568
569 /* If lasti and addr don't match exactly, we don't want to
570 change the lineno slot on the frame or execute a trace
571 function. Return -1 instead.
572 */
573 if (addr != lasti)
574 line = -1;
575
576 if (size > 0) {
577 while (--size >= 0) {
578 addr += *p++;
579 if (*p++)
580 break;
581 }
582 bounds->ap_upper = addr;
583 }
584 else {
585 bounds->ap_upper = INT_MAX;
586 }
587
588 return line;
589}