blob: 19dcc47f646f3e7fbf70a3c52000c6b24c3f52f8 [file] [log] [blame]
Jeremy Hylton3e0055f2005-10-20 19:59:25 +00001#include "Python.h"
2#include "code.h"
3#include "structmember.h"
4
5#define NAME_CHARS \
6 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
7
8/* all_name_chars(s): true iff all chars in s are valid NAME_CHARS */
9
10static int
11all_name_chars(unsigned char *s)
12{
13 static char ok_name_char[256];
14 static unsigned char *name_chars = (unsigned char *)NAME_CHARS;
15
16 if (ok_name_char[*name_chars] == 0) {
17 unsigned char *p;
18 for (p = name_chars; *p; p++)
19 ok_name_char[*p] = 1;
20 }
21 while (*s) {
22 if (ok_name_char[*s++] == 0)
23 return 0;
24 }
25 return 1;
26}
27
28static void
29intern_strings(PyObject *tuple)
30{
Martin v. Löwis18e16552006-02-15 17:27:45 +000031 Py_ssize_t i;
Jeremy Hylton3e0055f2005-10-20 19:59:25 +000032
33 for (i = PyTuple_GET_SIZE(tuple); --i >= 0; ) {
34 PyObject *v = PyTuple_GET_ITEM(tuple, i);
35 if (v == NULL || !PyString_CheckExact(v)) {
36 Py_FatalError("non-string found in code slot");
37 }
38 PyString_InternInPlace(&PyTuple_GET_ITEM(tuple, i));
39 }
40}
41
42
43PyCodeObject *
44PyCode_New(int argcount, int nlocals, int stacksize, int flags,
45 PyObject *code, PyObject *consts, PyObject *names,
46 PyObject *varnames, PyObject *freevars, PyObject *cellvars,
47 PyObject *filename, PyObject *name, int firstlineno,
48 PyObject *lnotab)
49{
50 PyCodeObject *co;
Martin v. Löwis18e16552006-02-15 17:27:45 +000051 Py_ssize_t i;
Jeremy Hylton3e0055f2005-10-20 19:59:25 +000052 /* Check argument types */
53 if (argcount < 0 || nlocals < 0 ||
54 code == NULL ||
55 consts == NULL || !PyTuple_Check(consts) ||
56 names == NULL || !PyTuple_Check(names) ||
57 varnames == NULL || !PyTuple_Check(varnames) ||
58 freevars == NULL || !PyTuple_Check(freevars) ||
59 cellvars == NULL || !PyTuple_Check(cellvars) ||
60 name == NULL || !PyString_Check(name) ||
61 filename == NULL || !PyString_Check(filename) ||
62 lnotab == NULL || !PyString_Check(lnotab) ||
63 !PyObject_CheckReadBuffer(code)) {
64 PyErr_BadInternalCall();
65 return NULL;
66 }
67 intern_strings(names);
68 intern_strings(varnames);
69 intern_strings(freevars);
70 intern_strings(cellvars);
71 /* Intern selected string constants */
72 for (i = PyTuple_Size(consts); --i >= 0; ) {
73 PyObject *v = PyTuple_GetItem(consts, i);
74 if (!PyString_Check(v))
75 continue;
76 if (!all_name_chars((unsigned char *)PyString_AS_STRING(v)))
77 continue;
78 PyString_InternInPlace(&PyTuple_GET_ITEM(consts, i));
79 }
80 co = PyObject_NEW(PyCodeObject, &PyCode_Type);
81 if (co != NULL) {
82 co->co_argcount = argcount;
83 co->co_nlocals = nlocals;
84 co->co_stacksize = stacksize;
85 co->co_flags = flags;
86 Py_INCREF(code);
87 co->co_code = code;
88 Py_INCREF(consts);
89 co->co_consts = consts;
90 Py_INCREF(names);
91 co->co_names = names;
92 Py_INCREF(varnames);
93 co->co_varnames = varnames;
94 Py_INCREF(freevars);
95 co->co_freevars = freevars;
96 Py_INCREF(cellvars);
97 co->co_cellvars = cellvars;
98 Py_INCREF(filename);
99 co->co_filename = filename;
100 Py_INCREF(name);
101 co->co_name = name;
102 co->co_firstlineno = firstlineno;
103 Py_INCREF(lnotab);
104 co->co_lnotab = lnotab;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000105 co->co_zombieframe = NULL;
Jeremy Hylton3e0055f2005-10-20 19:59:25 +0000106 }
107 return co;
108}
109
110
111#define OFF(x) offsetof(PyCodeObject, x)
112
113static PyMemberDef code_memberlist[] = {
114 {"co_argcount", T_INT, OFF(co_argcount), READONLY},
115 {"co_nlocals", T_INT, OFF(co_nlocals), READONLY},
116 {"co_stacksize",T_INT, OFF(co_stacksize), READONLY},
117 {"co_flags", T_INT, OFF(co_flags), READONLY},
118 {"co_code", T_OBJECT, OFF(co_code), READONLY},
119 {"co_consts", T_OBJECT, OFF(co_consts), READONLY},
120 {"co_names", T_OBJECT, OFF(co_names), READONLY},
121 {"co_varnames", T_OBJECT, OFF(co_varnames), READONLY},
122 {"co_freevars", T_OBJECT, OFF(co_freevars), READONLY},
123 {"co_cellvars", T_OBJECT, OFF(co_cellvars), READONLY},
124 {"co_filename", T_OBJECT, OFF(co_filename), READONLY},
125 {"co_name", T_OBJECT, OFF(co_name), READONLY},
126 {"co_firstlineno", T_INT, OFF(co_firstlineno), READONLY},
127 {"co_lnotab", T_OBJECT, OFF(co_lnotab), READONLY},
128 {NULL} /* Sentinel */
129};
130
131/* Helper for code_new: return a shallow copy of a tuple that is
132 guaranteed to contain exact strings, by converting string subclasses
133 to exact strings and complaining if a non-string is found. */
134static PyObject*
135validate_and_copy_tuple(PyObject *tup)
136{
137 PyObject *newtuple;
138 PyObject *item;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000139 Py_ssize_t i, len;
Jeremy Hylton3e0055f2005-10-20 19:59:25 +0000140
141 len = PyTuple_GET_SIZE(tup);
142 newtuple = PyTuple_New(len);
143 if (newtuple == NULL)
144 return NULL;
145
146 for (i = 0; i < len; i++) {
147 item = PyTuple_GET_ITEM(tup, i);
148 if (PyString_CheckExact(item)) {
149 Py_INCREF(item);
150 }
151 else if (!PyString_Check(item)) {
152 PyErr_Format(
153 PyExc_TypeError,
154 "name tuples must contain only "
155 "strings, not '%.500s'",
156 item->ob_type->tp_name);
157 Py_DECREF(newtuple);
158 return NULL;
159 }
160 else {
161 item = PyString_FromStringAndSize(
162 PyString_AS_STRING(item),
163 PyString_GET_SIZE(item));
164 if (item == NULL) {
165 Py_DECREF(newtuple);
166 return NULL;
167 }
168 }
169 PyTuple_SET_ITEM(newtuple, i, item);
170 }
171
172 return newtuple;
173}
174
175PyDoc_STRVAR(code_doc,
176"code(argcount, nlocals, stacksize, flags, codestring, constants, names,\n\
177 varnames, filename, name, firstlineno, lnotab[, freevars[, cellvars]])\n\
178\n\
179Create a code object. Not for the faint of heart.");
180
181static PyObject *
182code_new(PyTypeObject *type, PyObject *args, PyObject *kw)
183{
184 int argcount;
185 int nlocals;
186 int stacksize;
187 int flags;
188 PyObject *co = NULL;
189 PyObject *code;
190 PyObject *consts;
191 PyObject *names, *ournames = NULL;
192 PyObject *varnames, *ourvarnames = NULL;
193 PyObject *freevars = NULL, *ourfreevars = NULL;
194 PyObject *cellvars = NULL, *ourcellvars = NULL;
195 PyObject *filename;
196 PyObject *name;
197 int firstlineno;
198 PyObject *lnotab;
199
200 if (!PyArg_ParseTuple(args, "iiiiSO!O!O!SSiS|O!O!:code",
201 &argcount, &nlocals, &stacksize, &flags,
202 &code,
203 &PyTuple_Type, &consts,
204 &PyTuple_Type, &names,
205 &PyTuple_Type, &varnames,
206 &filename, &name,
207 &firstlineno, &lnotab,
208 &PyTuple_Type, &freevars,
209 &PyTuple_Type, &cellvars))
210 return NULL;
211
212 if (argcount < 0) {
213 PyErr_SetString(
214 PyExc_ValueError,
215 "code: argcount must not be negative");
216 goto cleanup;
217 }
218
219 if (nlocals < 0) {
220 PyErr_SetString(
221 PyExc_ValueError,
222 "code: nlocals must not be negative");
223 goto cleanup;
224 }
225
226 ournames = validate_and_copy_tuple(names);
227 if (ournames == NULL)
228 goto cleanup;
229 ourvarnames = validate_and_copy_tuple(varnames);
230 if (ourvarnames == NULL)
231 goto cleanup;
232 if (freevars)
233 ourfreevars = validate_and_copy_tuple(freevars);
234 else
235 ourfreevars = PyTuple_New(0);
236 if (ourfreevars == NULL)
237 goto cleanup;
238 if (cellvars)
239 ourcellvars = validate_and_copy_tuple(cellvars);
240 else
241 ourcellvars = PyTuple_New(0);
242 if (ourcellvars == NULL)
243 goto cleanup;
244
245 co = (PyObject *)PyCode_New(argcount, nlocals, stacksize, flags,
246 code, consts, ournames, ourvarnames,
247 ourfreevars, ourcellvars, filename,
248 name, firstlineno, lnotab);
249 cleanup:
250 Py_XDECREF(ournames);
251 Py_XDECREF(ourvarnames);
252 Py_XDECREF(ourfreevars);
253 Py_XDECREF(ourcellvars);
254 return co;
255}
256
257static void
258code_dealloc(PyCodeObject *co)
259{
260 Py_XDECREF(co->co_code);
261 Py_XDECREF(co->co_consts);
262 Py_XDECREF(co->co_names);
263 Py_XDECREF(co->co_varnames);
264 Py_XDECREF(co->co_freevars);
265 Py_XDECREF(co->co_cellvars);
266 Py_XDECREF(co->co_filename);
267 Py_XDECREF(co->co_name);
268 Py_XDECREF(co->co_lnotab);
Thomas Wouters477c8d52006-05-27 19:21:47 +0000269 if (co->co_zombieframe != NULL)
270 PyObject_GC_Del(co->co_zombieframe);
Jeremy Hylton3e0055f2005-10-20 19:59:25 +0000271 PyObject_DEL(co);
272}
273
274static PyObject *
275code_repr(PyCodeObject *co)
276{
277 char buf[500];
278 int lineno = -1;
279 char *filename = "???";
280 char *name = "???";
281
282 if (co->co_firstlineno != 0)
283 lineno = co->co_firstlineno;
284 if (co->co_filename && PyString_Check(co->co_filename))
285 filename = PyString_AS_STRING(co->co_filename);
286 if (co->co_name && PyString_Check(co->co_name))
287 name = PyString_AS_STRING(co->co_name);
288 PyOS_snprintf(buf, sizeof(buf),
289 "<code object %.100s at %p, file \"%.300s\", line %d>",
290 name, co, filename, lineno);
291 return PyString_FromString(buf);
292}
293
Guido van Rossum47b9ff62006-08-24 00:41:19 +0000294static PyObject *
295code_richcompare(PyObject *self, PyObject *other, int op)
Jeremy Hylton3e0055f2005-10-20 19:59:25 +0000296{
Guido van Rossum47b9ff62006-08-24 00:41:19 +0000297 /* Temporarily make this unsupported */
298 _Py_Break();
299 Py_INCREF(Py_NotImplemented);
300 return Py_NotImplemented;
301
302#if 0
Jeremy Hylton3e0055f2005-10-20 19:59:25 +0000303 int cmp;
304 cmp = PyObject_Compare(co->co_name, cp->co_name);
305 if (cmp) return cmp;
306 cmp = co->co_argcount - cp->co_argcount;
307 if (cmp) goto normalize;
308 cmp = co->co_nlocals - cp->co_nlocals;
309 if (cmp) goto normalize;
310 cmp = co->co_flags - cp->co_flags;
311 if (cmp) goto normalize;
312 cmp = co->co_firstlineno - cp->co_firstlineno;
313 if (cmp) goto normalize;
314 cmp = PyObject_Compare(co->co_code, cp->co_code);
315 if (cmp) return cmp;
316 cmp = PyObject_Compare(co->co_consts, cp->co_consts);
317 if (cmp) return cmp;
318 cmp = PyObject_Compare(co->co_names, cp->co_names);
319 if (cmp) return cmp;
320 cmp = PyObject_Compare(co->co_varnames, cp->co_varnames);
321 if (cmp) return cmp;
322 cmp = PyObject_Compare(co->co_freevars, cp->co_freevars);
323 if (cmp) return cmp;
324 cmp = PyObject_Compare(co->co_cellvars, cp->co_cellvars);
325 return cmp;
326
327 normalize:
328 if (cmp > 0)
329 return 1;
330 else if (cmp < 0)
331 return -1;
332 else
333 return 0;
Guido van Rossum47b9ff62006-08-24 00:41:19 +0000334#endif
Jeremy Hylton3e0055f2005-10-20 19:59:25 +0000335}
336
337static long
338code_hash(PyCodeObject *co)
339{
340 long h, h0, h1, h2, h3, h4, h5, h6;
341 h0 = PyObject_Hash(co->co_name);
342 if (h0 == -1) return -1;
343 h1 = PyObject_Hash(co->co_code);
344 if (h1 == -1) return -1;
345 h2 = PyObject_Hash(co->co_consts);
346 if (h2 == -1) return -1;
347 h3 = PyObject_Hash(co->co_names);
348 if (h3 == -1) return -1;
349 h4 = PyObject_Hash(co->co_varnames);
350 if (h4 == -1) return -1;
351 h5 = PyObject_Hash(co->co_freevars);
352 if (h5 == -1) return -1;
353 h6 = PyObject_Hash(co->co_cellvars);
354 if (h6 == -1) return -1;
355 h = h0 ^ h1 ^ h2 ^ h3 ^ h4 ^ h5 ^ h6 ^
356 co->co_argcount ^ co->co_nlocals ^ co->co_flags;
357 if (h == -1) h = -2;
358 return h;
359}
360
361/* XXX code objects need to participate in GC? */
362
363PyTypeObject PyCode_Type = {
364 PyObject_HEAD_INIT(&PyType_Type)
365 0,
366 "code",
367 sizeof(PyCodeObject),
368 0,
369 (destructor)code_dealloc, /* tp_dealloc */
370 0, /* tp_print */
371 0, /* tp_getattr */
372 0, /* tp_setattr */
Guido van Rossum47b9ff62006-08-24 00:41:19 +0000373 0, /* tp_compare */
Jeremy Hylton3e0055f2005-10-20 19:59:25 +0000374 (reprfunc)code_repr, /* tp_repr */
375 0, /* tp_as_number */
376 0, /* tp_as_sequence */
377 0, /* tp_as_mapping */
Guido van Rossum47b9ff62006-08-24 00:41:19 +0000378 0, /* tp_hash */
Jeremy Hylton3e0055f2005-10-20 19:59:25 +0000379 0, /* tp_call */
380 0, /* tp_str */
381 PyObject_GenericGetAttr, /* tp_getattro */
382 0, /* tp_setattro */
383 0, /* tp_as_buffer */
384 Py_TPFLAGS_DEFAULT, /* tp_flags */
385 code_doc, /* tp_doc */
386 0, /* tp_traverse */
387 0, /* tp_clear */
388 0, /* tp_richcompare */
389 0, /* tp_weaklistoffset */
390 0, /* tp_iter */
391 0, /* tp_iternext */
392 0, /* tp_methods */
393 code_memberlist, /* tp_members */
394 0, /* tp_getset */
395 0, /* tp_base */
396 0, /* tp_dict */
397 0, /* tp_descr_get */
398 0, /* tp_descr_set */
399 0, /* tp_dictoffset */
400 0, /* tp_init */
401 0, /* tp_alloc */
402 code_new, /* tp_new */
403};
404
405/* All about c_lnotab.
406
407c_lnotab is an array of unsigned bytes disguised as a Python string. In -O
408mode, SET_LINENO opcodes aren't generated, and bytecode offsets are mapped
409to source code line #s (when needed for tracebacks) via c_lnotab instead.
410The array is conceptually a list of
411 (bytecode offset increment, line number increment)
412pairs. The details are important and delicate, best illustrated by example:
413
414 byte code offset source code line number
415 0 1
416 6 2
417 50 7
418 350 307
419 361 308
420
421The first trick is that these numbers aren't stored, only the increments
422from one row to the next (this doesn't really work, but it's a start):
423
424 0, 1, 6, 1, 44, 5, 300, 300, 11, 1
425
426The second trick is that an unsigned byte can't hold negative values, or
427values larger than 255, so (a) there's a deep assumption that byte code
428offsets and their corresponding line #s both increase monotonically, and (b)
429if at least one column jumps by more than 255 from one row to the next, more
430than one pair is written to the table. In case #b, there's no way to know
431from looking at the table later how many were written. That's the delicate
432part. A user of c_lnotab desiring to find the source line number
433corresponding to a bytecode address A should do something like this
434
435 lineno = addr = 0
436 for addr_incr, line_incr in c_lnotab:
437 addr += addr_incr
438 if addr > A:
439 return lineno
440 lineno += line_incr
441
442In order for this to work, when the addr field increments by more than 255,
443the line # increment in each pair generated must be 0 until the remaining addr
444increment is < 256. So, in the example above, com_set_lineno should not (as
445was actually done until 2.2) expand 300, 300 to 255, 255, 45, 45, but to
446255, 0, 45, 255, 0, 45.
447*/
448
449int
450PyCode_Addr2Line(PyCodeObject *co, int addrq)
451{
452 int size = PyString_Size(co->co_lnotab) / 2;
453 unsigned char *p = (unsigned char*)PyString_AsString(co->co_lnotab);
454 int line = co->co_firstlineno;
455 int addr = 0;
456 while (--size >= 0) {
457 addr += *p++;
458 if (addr > addrq)
459 break;
460 line += *p++;
461 }
462 return line;
463}
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000464
465/*
466 Check whether the current instruction is at the start of a line.
467
468 */
469
470 /* The theory of SET_LINENO-less tracing.
471
472 In a nutshell, we use the co_lnotab field of the code object
473 to tell when execution has moved onto a different line.
474
475 As mentioned above, the basic idea is so set things up so
476 that
477
478 *instr_lb <= frame->f_lasti < *instr_ub
479
480 is true so long as execution does not change lines.
481
482 This is all fairly simple. Digging the information out of
483 co_lnotab takes some work, but is conceptually clear.
484
485 Somewhat harder to explain is why we don't *always* call the
486 line trace function when the above test fails.
487
488 Consider this code:
489
490 1: def f(a):
491 2: if a:
492 3: print 1
493 4: else:
494 5: print 2
495
496 which compiles to this:
497
498 2 0 LOAD_FAST 0 (a)
499 3 JUMP_IF_FALSE 9 (to 15)
500 6 POP_TOP
501
502 3 7 LOAD_CONST 1 (1)
503 10 PRINT_ITEM
504 11 PRINT_NEWLINE
505 12 JUMP_FORWARD 6 (to 21)
506 >> 15 POP_TOP
507
508 5 16 LOAD_CONST 2 (2)
509 19 PRINT_ITEM
510 20 PRINT_NEWLINE
511 >> 21 LOAD_CONST 0 (None)
512 24 RETURN_VALUE
513
514 If 'a' is false, execution will jump to instruction at offset
515 15 and the co_lnotab will claim that execution has moved to
516 line 3. This is at best misleading. In this case we could
517 associate the POP_TOP with line 4, but that doesn't make
518 sense in all cases (I think).
519
520 What we do is only call the line trace function if the co_lnotab
521 indicates we have jumped to the *start* of a line, i.e. if the
522 current instruction offset matches the offset given for the
523 start of a line by the co_lnotab.
524
525 This also takes care of the situation where 'a' is true.
526 Execution will jump from instruction offset 12 to offset 21.
527 Then the co_lnotab would imply that execution has moved to line
528 5, which is again misleading.
529
530 Why do we set f_lineno when tracing? Well, consider the code
531 above when 'a' is true. If stepping through this with 'n' in
532 pdb, you would stop at line 1 with a "call" type event, then
533 line events on lines 2 and 3, then a "return" type event -- but
534 you would be shown line 5 during this event. This is a change
535 from the behaviour in 2.2 and before, and I've found it
536 confusing in practice. By setting and using f_lineno when
537 tracing, one can report a line number different from that
538 suggested by f_lasti on this one occasion where it's desirable.
539 */
540
541
542int
543PyCode_CheckLineNumber(PyCodeObject* co, int lasti, PyAddrPair *bounds)
544{
545 int size, addr, line;
546 unsigned char* p;
547
548 p = (unsigned char*)PyString_AS_STRING(co->co_lnotab);
549 size = PyString_GET_SIZE(co->co_lnotab) / 2;
550
551 addr = 0;
552 line = co->co_firstlineno;
553 assert(line > 0);
554
555 /* possible optimization: if f->f_lasti == instr_ub
556 (likely to be a common case) then we already know
557 instr_lb -- if we stored the matching value of p
558 somwhere we could skip the first while loop. */
559
560 /* see comments in compile.c for the description of
561 co_lnotab. A point to remember: increments to p
562 should come in pairs -- although we don't care about
563 the line increments here, treating them as byte
564 increments gets confusing, to say the least. */
565
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000566 bounds->ap_lower = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000567 while (size > 0) {
568 if (addr + *p > lasti)
569 break;
570 addr += *p++;
571 if (*p)
572 bounds->ap_lower = addr;
573 line += *p++;
574 --size;
575 }
576
577 /* If lasti and addr don't match exactly, we don't want to
578 change the lineno slot on the frame or execute a trace
579 function. Return -1 instead.
580 */
581 if (addr != lasti)
582 line = -1;
583
584 if (size > 0) {
585 while (--size >= 0) {
586 addr += *p++;
587 if (*p++)
588 break;
589 }
590 bounds->ap_upper = addr;
591 }
592 else {
593 bounds->ap_upper = INT_MAX;
594 }
595
596 return line;
597}