bpo-26219: per opcode cache for LOAD_GLOBAL (GH-12884)

This patch implements per opcode cache mechanism, and use it in
only LOAD_GLOBAL opcode.

Based on Yury's opcache3.patch in bpo-26219.
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 3de5528..fc7e551 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -1242,6 +1242,9 @@
     /* Destroy all modules */
     PyImport_Cleanup();
 
+    /* Print debug stats if any */
+    _PyEval_Fini();
+
     /* Flush sys.stdout and sys.stderr (again, in case more was printed) */
     if (flush_std_files() < 0) {
         status = -1;