Move the bytecode optimizer upstream so that its results are saved in pyc
files and not re-optimized upon import.  Saves a bit of startup time while
still remaining decoupled from the rest of the compiler.

As a side benefit, handcoded bytecode is not run through the optimizer
when new code objects are created.  Hopefully, a handcoder has already
created exactly what they want to have run.

(Idea suggested by Armin Rigo and Michael Hudson.  Initially avoided
 because of worries about compiler coupling; however, only the nexus
 point needed to be moved so there won't be a conflict when the AST
 branch is loaded.)
diff --git a/Python/compile.c b/Python/compile.c
index fbb91f7..12ab03e 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -625,7 +625,8 @@
 		co->co_nlocals = nlocals;
 		co->co_stacksize = stacksize;
 		co->co_flags = flags;
-		co->co_code = optimize_code(code, consts, names);
+		Py_INCREF(code);
+		co->co_code = code;
 		Py_INCREF(consts);
 		co->co_consts = consts;
 		Py_INCREF(names);
@@ -4791,7 +4792,7 @@
 	com_done(&sc);
 	if (sc.c_errors == 0) {
 		PyObject *consts, *names, *varnames, *filename, *name,
-			*freevars, *cellvars;
+			*freevars, *cellvars, *code;
 		consts = PyList_AsTuple(sc.c_consts);
 		names = PyList_AsTuple(sc.c_names);
 		varnames = PyList_AsTuple(sc.c_varnames);
@@ -4800,12 +4801,13 @@
 					     PyTuple_GET_SIZE(cellvars));
 		filename = PyString_InternFromString(sc.c_filename);
 		name = PyString_InternFromString(sc.c_name);
+		code = optimize_code(sc.c_code, consts, names);
 		if (!PyErr_Occurred())
 			co = PyCode_New(sc.c_argcount,
 					sc.c_nlocals,
 					sc.c_maxstacklevel,
 					sc.c_flags,
-					sc.c_code,
+					code,
 					consts,
 					names,
 					varnames,
@@ -4822,6 +4824,7 @@
 		Py_XDECREF(cellvars);
 		Py_XDECREF(filename);
 		Py_XDECREF(name);
+		Py_XDECREF(code);
 	}
 	else if (!PyErr_Occurred()) {
 		/* This could happen if someone called PyErr_Clear() after an