Instead of initializing & interning the strings passed to the profile
and trace functions lazily, which incurs extra argument pushing and checks
in the C overhead for profiling/tracing, create the strings semi-lazily
when the Python code first registers a profile or trace function.  This
simplifies the trampoline into the profile/trace functions.
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 52fbbc8..62e0841 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -196,10 +196,14 @@
 \n\
 Set the current default string encoding used by the Unicode implementation.";
 
+extern int _PyTrace_Init(void);
+
 static PyObject *
 sys_settrace(PyObject *self, PyObject *args)
 {
 	PyThreadState *tstate = PyThreadState_Get();
+	if (_PyTrace_Init() == -1)
+		return NULL;
 	if (args == Py_None)
 		args = NULL;
 	else
@@ -220,6 +224,8 @@
 sys_setprofile(PyObject *self, PyObject *args)
 {
 	PyThreadState *tstate = PyThreadState_Get();
+	if (_PyTrace_Init() == -1)
+		return NULL;
 	if (args == Py_None)
 		args = NULL;
 	else