Add periodic CPython garbage collector statistics logging to aid in
tracking down a memory leak and as a general health beacon for the long
running process.

The interval at which stats are logged is configurable.

Signed-off-by: Gregory Smith <gps@google.com>


git-svn-id: http://test.kernel.org/svn/autotest/trunk@4021 592f7852-d20e-0410-864c-8624ca9c26a4
diff --git a/scheduler/gc_stats.py b/scheduler/gc_stats.py
new file mode 100644
index 0000000..35e5302
--- /dev/null
+++ b/scheduler/gc_stats.py
@@ -0,0 +1,89 @@
+# Compute and gather statistics about garbage collection in this process.
+# This module depends on the CPython gc module and garbage collection behavior.
+
+import gc, logging, pprint
+
+
+verbose = False
+
+
+# A mapping from type objects to a count of instances of those types in the
+# garbage collectors all objects list on the previous call to
+# _log_garbage_collector_stats().
+_previous_obj_type_map = {}
+
+
+# A set of object ids for everything in the all objects list on the
+# previous call to _log_garbage_collector_stats().
+_previous_obj_ids = set()
+
+
+def _log_garbage_collector_stats(minimum_count=10):
+    """
+    Log statistics about how many of what type of Python object exist in this
+    process.
+
+    @param minimum_count: The minimum number of instances of a type for it
+            to be considered worthy of logging.
+    """
+    global _previous_obj_type_map
+    global _previous_obj_ids
+
+    # We get all objects -before- creating any new objects within this function.
+    # to avoid having our own local instances in the list.
+    all_objects = gc.get_objects()
+    obj = None
+    new_objects = []
+    try:
+        obj_type_map = {}
+        object_ids = set()
+        for obj in all_objects:
+            obj_type = type(obj)
+            obj_type_map.setdefault(obj_type, 0)
+            obj_type_map[obj_type] += 1
+            object_ids.add(id(obj))
+        whats_new_big_str = ''
+        if verbose and _previous_obj_ids:
+            new_object_ids = object_ids - _previous_obj_ids
+            for obj in all_objects:
+                if id(obj) in new_object_ids:
+                    new_objects.append(obj)
+            whats_new_big_str = pprint.pformat(new_objects, indent=1)
+    finally:
+        # Never keep references to stuff returned by gc.get_objects() around
+        # or it'll just make the future cyclic gc runs more difficult.
+        del all_objects
+        del obj
+        del new_objects
+
+
+    delta = {}
+    for obj_type, count in obj_type_map.iteritems():
+        if obj_type not in _previous_obj_type_map:
+            delta[obj_type] = count
+        elif _previous_obj_type_map[obj_type] != count:
+            delta[obj_type] = count - _previous_obj_type_map[obj_type]
+
+    sorted_stats = reversed(sorted(
+            (count, obj_type) for obj_type, count in obj_type_map.iteritems()))
+    sorted_delta = reversed(sorted(
+            (count, obj_type) for obj_type, count in delta.iteritems()))
+
+    logging.debug('Garbage collector object type counts:')
+    for count, obj_type in sorted_stats:
+        if count >= minimum_count:
+            logging.debug('  %d\t%s', count, obj_type)
+
+    logging.info('Change in object counts since previous GC stats:')
+    for change, obj_type in sorted_delta:
+        if obj_type_map[obj_type] > minimum_count:
+            logging.info('  %+d\t%s\tto %d', change, obj_type,
+                         obj_type_map[obj_type])
+
+    if verbose and whats_new_big_str:
+        logging.debug('Pretty printed representation of the new objects:')
+        logging.debug(whats_new_big_str)
+
+    _previous_obj_type_map = obj_type_map
+    if verbose:
+        _previous_obj_ids = object_ids