blob: 8ae23994b67c80ff1611a178cd7b4897b594048c [file] [log] [blame]
Jeremy Hylton3e0055f2005-10-20 19:59:25 +00001#include "Python.h"
2#include "code.h"
3#include "structmember.h"
4
5#define NAME_CHARS \
6 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
7
8/* all_name_chars(s): true iff all chars in s are valid NAME_CHARS */
9
10static int
11all_name_chars(unsigned char *s)
12{
13 static char ok_name_char[256];
14 static unsigned char *name_chars = (unsigned char *)NAME_CHARS;
15
16 if (ok_name_char[*name_chars] == 0) {
17 unsigned char *p;
18 for (p = name_chars; *p; p++)
19 ok_name_char[*p] = 1;
20 }
21 while (*s) {
22 if (ok_name_char[*s++] == 0)
23 return 0;
24 }
25 return 1;
26}
27
28static void
29intern_strings(PyObject *tuple)
30{
Martin v. Löwis18e16552006-02-15 17:27:45 +000031 Py_ssize_t i;
Jeremy Hylton3e0055f2005-10-20 19:59:25 +000032
33 for (i = PyTuple_GET_SIZE(tuple); --i >= 0; ) {
34 PyObject *v = PyTuple_GET_ITEM(tuple, i);
35 if (v == NULL || !PyString_CheckExact(v)) {
36 Py_FatalError("non-string found in code slot");
37 }
38 PyString_InternInPlace(&PyTuple_GET_ITEM(tuple, i));
39 }
40}
41
42
43PyCodeObject *
44PyCode_New(int argcount, int nlocals, int stacksize, int flags,
45 PyObject *code, PyObject *consts, PyObject *names,
46 PyObject *varnames, PyObject *freevars, PyObject *cellvars,
47 PyObject *filename, PyObject *name, int firstlineno,
48 PyObject *lnotab)
49{
50 PyCodeObject *co;
Martin v. Löwis18e16552006-02-15 17:27:45 +000051 Py_ssize_t i;
Jeremy Hylton3e0055f2005-10-20 19:59:25 +000052 /* Check argument types */
53 if (argcount < 0 || nlocals < 0 ||
54 code == NULL ||
55 consts == NULL || !PyTuple_Check(consts) ||
56 names == NULL || !PyTuple_Check(names) ||
57 varnames == NULL || !PyTuple_Check(varnames) ||
58 freevars == NULL || !PyTuple_Check(freevars) ||
59 cellvars == NULL || !PyTuple_Check(cellvars) ||
60 name == NULL || !PyString_Check(name) ||
61 filename == NULL || !PyString_Check(filename) ||
62 lnotab == NULL || !PyString_Check(lnotab) ||
63 !PyObject_CheckReadBuffer(code)) {
64 PyErr_BadInternalCall();
65 return NULL;
66 }
67 intern_strings(names);
68 intern_strings(varnames);
69 intern_strings(freevars);
70 intern_strings(cellvars);
71 /* Intern selected string constants */
72 for (i = PyTuple_Size(consts); --i >= 0; ) {
73 PyObject *v = PyTuple_GetItem(consts, i);
74 if (!PyString_Check(v))
75 continue;
76 if (!all_name_chars((unsigned char *)PyString_AS_STRING(v)))
77 continue;
78 PyString_InternInPlace(&PyTuple_GET_ITEM(consts, i));
79 }
80 co = PyObject_NEW(PyCodeObject, &PyCode_Type);
81 if (co != NULL) {
82 co->co_argcount = argcount;
83 co->co_nlocals = nlocals;
84 co->co_stacksize = stacksize;
85 co->co_flags = flags;
86 Py_INCREF(code);
87 co->co_code = code;
88 Py_INCREF(consts);
89 co->co_consts = consts;
90 Py_INCREF(names);
91 co->co_names = names;
92 Py_INCREF(varnames);
93 co->co_varnames = varnames;
94 Py_INCREF(freevars);
95 co->co_freevars = freevars;
96 Py_INCREF(cellvars);
97 co->co_cellvars = cellvars;
98 Py_INCREF(filename);
99 co->co_filename = filename;
100 Py_INCREF(name);
101 co->co_name = name;
102 co->co_firstlineno = firstlineno;
103 Py_INCREF(lnotab);
104 co->co_lnotab = lnotab;
105 }
106 return co;
107}
108
109
110#define OFF(x) offsetof(PyCodeObject, x)
111
112static PyMemberDef code_memberlist[] = {
113 {"co_argcount", T_INT, OFF(co_argcount), READONLY},
114 {"co_nlocals", T_INT, OFF(co_nlocals), READONLY},
115 {"co_stacksize",T_INT, OFF(co_stacksize), READONLY},
116 {"co_flags", T_INT, OFF(co_flags), READONLY},
117 {"co_code", T_OBJECT, OFF(co_code), READONLY},
118 {"co_consts", T_OBJECT, OFF(co_consts), READONLY},
119 {"co_names", T_OBJECT, OFF(co_names), READONLY},
120 {"co_varnames", T_OBJECT, OFF(co_varnames), READONLY},
121 {"co_freevars", T_OBJECT, OFF(co_freevars), READONLY},
122 {"co_cellvars", T_OBJECT, OFF(co_cellvars), READONLY},
123 {"co_filename", T_OBJECT, OFF(co_filename), READONLY},
124 {"co_name", T_OBJECT, OFF(co_name), READONLY},
125 {"co_firstlineno", T_INT, OFF(co_firstlineno), READONLY},
126 {"co_lnotab", T_OBJECT, OFF(co_lnotab), READONLY},
127 {NULL} /* Sentinel */
128};
129
130/* Helper for code_new: return a shallow copy of a tuple that is
131 guaranteed to contain exact strings, by converting string subclasses
132 to exact strings and complaining if a non-string is found. */
133static PyObject*
134validate_and_copy_tuple(PyObject *tup)
135{
136 PyObject *newtuple;
137 PyObject *item;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000138 Py_ssize_t i, len;
Jeremy Hylton3e0055f2005-10-20 19:59:25 +0000139
140 len = PyTuple_GET_SIZE(tup);
141 newtuple = PyTuple_New(len);
142 if (newtuple == NULL)
143 return NULL;
144
145 for (i = 0; i < len; i++) {
146 item = PyTuple_GET_ITEM(tup, i);
147 if (PyString_CheckExact(item)) {
148 Py_INCREF(item);
149 }
150 else if (!PyString_Check(item)) {
151 PyErr_Format(
152 PyExc_TypeError,
153 "name tuples must contain only "
154 "strings, not '%.500s'",
155 item->ob_type->tp_name);
156 Py_DECREF(newtuple);
157 return NULL;
158 }
159 else {
160 item = PyString_FromStringAndSize(
161 PyString_AS_STRING(item),
162 PyString_GET_SIZE(item));
163 if (item == NULL) {
164 Py_DECREF(newtuple);
165 return NULL;
166 }
167 }
168 PyTuple_SET_ITEM(newtuple, i, item);
169 }
170
171 return newtuple;
172}
173
174PyDoc_STRVAR(code_doc,
175"code(argcount, nlocals, stacksize, flags, codestring, constants, names,\n\
176 varnames, filename, name, firstlineno, lnotab[, freevars[, cellvars]])\n\
177\n\
178Create a code object. Not for the faint of heart.");
179
180static PyObject *
181code_new(PyTypeObject *type, PyObject *args, PyObject *kw)
182{
183 int argcount;
184 int nlocals;
185 int stacksize;
186 int flags;
187 PyObject *co = NULL;
188 PyObject *code;
189 PyObject *consts;
190 PyObject *names, *ournames = NULL;
191 PyObject *varnames, *ourvarnames = NULL;
192 PyObject *freevars = NULL, *ourfreevars = NULL;
193 PyObject *cellvars = NULL, *ourcellvars = NULL;
194 PyObject *filename;
195 PyObject *name;
196 int firstlineno;
197 PyObject *lnotab;
198
199 if (!PyArg_ParseTuple(args, "iiiiSO!O!O!SSiS|O!O!:code",
200 &argcount, &nlocals, &stacksize, &flags,
201 &code,
202 &PyTuple_Type, &consts,
203 &PyTuple_Type, &names,
204 &PyTuple_Type, &varnames,
205 &filename, &name,
206 &firstlineno, &lnotab,
207 &PyTuple_Type, &freevars,
208 &PyTuple_Type, &cellvars))
209 return NULL;
210
211 if (argcount < 0) {
212 PyErr_SetString(
213 PyExc_ValueError,
214 "code: argcount must not be negative");
215 goto cleanup;
216 }
217
218 if (nlocals < 0) {
219 PyErr_SetString(
220 PyExc_ValueError,
221 "code: nlocals must not be negative");
222 goto cleanup;
223 }
224
225 ournames = validate_and_copy_tuple(names);
226 if (ournames == NULL)
227 goto cleanup;
228 ourvarnames = validate_and_copy_tuple(varnames);
229 if (ourvarnames == NULL)
230 goto cleanup;
231 if (freevars)
232 ourfreevars = validate_and_copy_tuple(freevars);
233 else
234 ourfreevars = PyTuple_New(0);
235 if (ourfreevars == NULL)
236 goto cleanup;
237 if (cellvars)
238 ourcellvars = validate_and_copy_tuple(cellvars);
239 else
240 ourcellvars = PyTuple_New(0);
241 if (ourcellvars == NULL)
242 goto cleanup;
243
244 co = (PyObject *)PyCode_New(argcount, nlocals, stacksize, flags,
245 code, consts, ournames, ourvarnames,
246 ourfreevars, ourcellvars, filename,
247 name, firstlineno, lnotab);
248 cleanup:
249 Py_XDECREF(ournames);
250 Py_XDECREF(ourvarnames);
251 Py_XDECREF(ourfreevars);
252 Py_XDECREF(ourcellvars);
253 return co;
254}
255
256static void
257code_dealloc(PyCodeObject *co)
258{
259 Py_XDECREF(co->co_code);
260 Py_XDECREF(co->co_consts);
261 Py_XDECREF(co->co_names);
262 Py_XDECREF(co->co_varnames);
263 Py_XDECREF(co->co_freevars);
264 Py_XDECREF(co->co_cellvars);
265 Py_XDECREF(co->co_filename);
266 Py_XDECREF(co->co_name);
267 Py_XDECREF(co->co_lnotab);
268 PyObject_DEL(co);
269}
270
271static PyObject *
272code_repr(PyCodeObject *co)
273{
274 char buf[500];
275 int lineno = -1;
276 char *filename = "???";
277 char *name = "???";
278
279 if (co->co_firstlineno != 0)
280 lineno = co->co_firstlineno;
281 if (co->co_filename && PyString_Check(co->co_filename))
282 filename = PyString_AS_STRING(co->co_filename);
283 if (co->co_name && PyString_Check(co->co_name))
284 name = PyString_AS_STRING(co->co_name);
285 PyOS_snprintf(buf, sizeof(buf),
286 "<code object %.100s at %p, file \"%.300s\", line %d>",
287 name, co, filename, lineno);
288 return PyString_FromString(buf);
289}
290
291static int
292code_compare(PyCodeObject *co, PyCodeObject *cp)
293{
294 int cmp;
295 cmp = PyObject_Compare(co->co_name, cp->co_name);
296 if (cmp) return cmp;
297 cmp = co->co_argcount - cp->co_argcount;
298 if (cmp) goto normalize;
299 cmp = co->co_nlocals - cp->co_nlocals;
300 if (cmp) goto normalize;
301 cmp = co->co_flags - cp->co_flags;
302 if (cmp) goto normalize;
303 cmp = co->co_firstlineno - cp->co_firstlineno;
304 if (cmp) goto normalize;
305 cmp = PyObject_Compare(co->co_code, cp->co_code);
306 if (cmp) return cmp;
307 cmp = PyObject_Compare(co->co_consts, cp->co_consts);
308 if (cmp) return cmp;
309 cmp = PyObject_Compare(co->co_names, cp->co_names);
310 if (cmp) return cmp;
311 cmp = PyObject_Compare(co->co_varnames, cp->co_varnames);
312 if (cmp) return cmp;
313 cmp = PyObject_Compare(co->co_freevars, cp->co_freevars);
314 if (cmp) return cmp;
315 cmp = PyObject_Compare(co->co_cellvars, cp->co_cellvars);
316 return cmp;
317
318 normalize:
319 if (cmp > 0)
320 return 1;
321 else if (cmp < 0)
322 return -1;
323 else
324 return 0;
325}
326
327static long
328code_hash(PyCodeObject *co)
329{
330 long h, h0, h1, h2, h3, h4, h5, h6;
331 h0 = PyObject_Hash(co->co_name);
332 if (h0 == -1) return -1;
333 h1 = PyObject_Hash(co->co_code);
334 if (h1 == -1) return -1;
335 h2 = PyObject_Hash(co->co_consts);
336 if (h2 == -1) return -1;
337 h3 = PyObject_Hash(co->co_names);
338 if (h3 == -1) return -1;
339 h4 = PyObject_Hash(co->co_varnames);
340 if (h4 == -1) return -1;
341 h5 = PyObject_Hash(co->co_freevars);
342 if (h5 == -1) return -1;
343 h6 = PyObject_Hash(co->co_cellvars);
344 if (h6 == -1) return -1;
345 h = h0 ^ h1 ^ h2 ^ h3 ^ h4 ^ h5 ^ h6 ^
346 co->co_argcount ^ co->co_nlocals ^ co->co_flags;
347 if (h == -1) h = -2;
348 return h;
349}
350
351/* XXX code objects need to participate in GC? */
352
353PyTypeObject PyCode_Type = {
354 PyObject_HEAD_INIT(&PyType_Type)
355 0,
356 "code",
357 sizeof(PyCodeObject),
358 0,
359 (destructor)code_dealloc, /* tp_dealloc */
360 0, /* tp_print */
361 0, /* tp_getattr */
362 0, /* tp_setattr */
363 (cmpfunc)code_compare, /* tp_compare */
364 (reprfunc)code_repr, /* tp_repr */
365 0, /* tp_as_number */
366 0, /* tp_as_sequence */
367 0, /* tp_as_mapping */
368 (hashfunc)code_hash, /* tp_hash */
369 0, /* tp_call */
370 0, /* tp_str */
371 PyObject_GenericGetAttr, /* tp_getattro */
372 0, /* tp_setattro */
373 0, /* tp_as_buffer */
374 Py_TPFLAGS_DEFAULT, /* tp_flags */
375 code_doc, /* tp_doc */
376 0, /* tp_traverse */
377 0, /* tp_clear */
378 0, /* tp_richcompare */
379 0, /* tp_weaklistoffset */
380 0, /* tp_iter */
381 0, /* tp_iternext */
382 0, /* tp_methods */
383 code_memberlist, /* tp_members */
384 0, /* tp_getset */
385 0, /* tp_base */
386 0, /* tp_dict */
387 0, /* tp_descr_get */
388 0, /* tp_descr_set */
389 0, /* tp_dictoffset */
390 0, /* tp_init */
391 0, /* tp_alloc */
392 code_new, /* tp_new */
393};
394
395/* All about c_lnotab.
396
397c_lnotab is an array of unsigned bytes disguised as a Python string. In -O
398mode, SET_LINENO opcodes aren't generated, and bytecode offsets are mapped
399to source code line #s (when needed for tracebacks) via c_lnotab instead.
400The array is conceptually a list of
401 (bytecode offset increment, line number increment)
402pairs. The details are important and delicate, best illustrated by example:
403
404 byte code offset source code line number
405 0 1
406 6 2
407 50 7
408 350 307
409 361 308
410
411The first trick is that these numbers aren't stored, only the increments
412from one row to the next (this doesn't really work, but it's a start):
413
414 0, 1, 6, 1, 44, 5, 300, 300, 11, 1
415
416The second trick is that an unsigned byte can't hold negative values, or
417values larger than 255, so (a) there's a deep assumption that byte code
418offsets and their corresponding line #s both increase monotonically, and (b)
419if at least one column jumps by more than 255 from one row to the next, more
420than one pair is written to the table. In case #b, there's no way to know
421from looking at the table later how many were written. That's the delicate
422part. A user of c_lnotab desiring to find the source line number
423corresponding to a bytecode address A should do something like this
424
425 lineno = addr = 0
426 for addr_incr, line_incr in c_lnotab:
427 addr += addr_incr
428 if addr > A:
429 return lineno
430 lineno += line_incr
431
432In order for this to work, when the addr field increments by more than 255,
433the line # increment in each pair generated must be 0 until the remaining addr
434increment is < 256. So, in the example above, com_set_lineno should not (as
435was actually done until 2.2) expand 300, 300 to 255, 255, 45, 45, but to
436255, 0, 45, 255, 0, 45.
437*/
438
439int
440PyCode_Addr2Line(PyCodeObject *co, int addrq)
441{
442 int size = PyString_Size(co->co_lnotab) / 2;
443 unsigned char *p = (unsigned char*)PyString_AsString(co->co_lnotab);
444 int line = co->co_firstlineno;
445 int addr = 0;
446 while (--size >= 0) {
447 addr += *p++;
448 if (addr > addrq)
449 break;
450 line += *p++;
451 }
452 return line;
453}
Jeremy Hyltona4ebc132006-04-18 14:47:00 +0000454
455/*
456 Check whether the current instruction is at the start of a line.
457
458 */
459
460 /* The theory of SET_LINENO-less tracing.
461
462 In a nutshell, we use the co_lnotab field of the code object
463 to tell when execution has moved onto a different line.
464
465 As mentioned above, the basic idea is so set things up so
466 that
467
468 *instr_lb <= frame->f_lasti < *instr_ub
469
470 is true so long as execution does not change lines.
471
472 This is all fairly simple. Digging the information out of
473 co_lnotab takes some work, but is conceptually clear.
474
475 Somewhat harder to explain is why we don't *always* call the
476 line trace function when the above test fails.
477
478 Consider this code:
479
480 1: def f(a):
481 2: if a:
482 3: print 1
483 4: else:
484 5: print 2
485
486 which compiles to this:
487
488 2 0 LOAD_FAST 0 (a)
489 3 JUMP_IF_FALSE 9 (to 15)
490 6 POP_TOP
491
492 3 7 LOAD_CONST 1 (1)
493 10 PRINT_ITEM
494 11 PRINT_NEWLINE
495 12 JUMP_FORWARD 6 (to 21)
496 >> 15 POP_TOP
497
498 5 16 LOAD_CONST 2 (2)
499 19 PRINT_ITEM
500 20 PRINT_NEWLINE
501 >> 21 LOAD_CONST 0 (None)
502 24 RETURN_VALUE
503
504 If 'a' is false, execution will jump to instruction at offset
505 15 and the co_lnotab will claim that execution has moved to
506 line 3. This is at best misleading. In this case we could
507 associate the POP_TOP with line 4, but that doesn't make
508 sense in all cases (I think).
509
510 What we do is only call the line trace function if the co_lnotab
511 indicates we have jumped to the *start* of a line, i.e. if the
512 current instruction offset matches the offset given for the
513 start of a line by the co_lnotab.
514
515 This also takes care of the situation where 'a' is true.
516 Execution will jump from instruction offset 12 to offset 21.
517 Then the co_lnotab would imply that execution has moved to line
518 5, which is again misleading.
519
520 Why do we set f_lineno when tracing? Well, consider the code
521 above when 'a' is true. If stepping through this with 'n' in
522 pdb, you would stop at line 1 with a "call" type event, then
523 line events on lines 2 and 3, then a "return" type event -- but
524 you would be shown line 5 during this event. This is a change
525 from the behaviour in 2.2 and before, and I've found it
526 confusing in practice. By setting and using f_lineno when
527 tracing, one can report a line number different from that
528 suggested by f_lasti on this one occasion where it's desirable.
529 */
530
531
532int
533PyCode_CheckLineNumber(PyCodeObject* co, int lasti, PyAddrPair *bounds)
534{
535 int size, addr, line;
536 unsigned char* p;
537
538 p = (unsigned char*)PyString_AS_STRING(co->co_lnotab);
539 size = PyString_GET_SIZE(co->co_lnotab) / 2;
540
541 addr = 0;
542 line = co->co_firstlineno;
543 assert(line > 0);
544
545 /* possible optimization: if f->f_lasti == instr_ub
546 (likely to be a common case) then we already know
547 instr_lb -- if we stored the matching value of p
548 somwhere we could skip the first while loop. */
549
550 /* see comments in compile.c for the description of
551 co_lnotab. A point to remember: increments to p
552 should come in pairs -- although we don't care about
553 the line increments here, treating them as byte
554 increments gets confusing, to say the least. */
555
556 while (size > 0) {
557 if (addr + *p > lasti)
558 break;
559 addr += *p++;
560 if (*p)
561 bounds->ap_lower = addr;
562 line += *p++;
563 --size;
564 }
565
566 /* If lasti and addr don't match exactly, we don't want to
567 change the lineno slot on the frame or execute a trace
568 function. Return -1 instead.
569 */
570 if (addr != lasti)
571 line = -1;
572
573 if (size > 0) {
574 while (--size >= 0) {
575 addr += *p++;
576 if (*p++)
577 break;
578 }
579 bounds->ap_upper = addr;
580 }
581 else {
582 bounds->ap_upper = INT_MAX;
583 }
584
585 return line;
586}