Issue 1696199: Add collections.Counter().
diff --git a/Lib/collections.py b/Lib/collections.py
index ace2b2a..ff49844 100644
--- a/Lib/collections.py
+++ b/Lib/collections.py
@@ -9,6 +9,11 @@
 from operator import itemgetter as _itemgetter
 from keyword import iskeyword as _iskeyword
 import sys as _sys
+import heapq as _heapq
+import itertools as _itertools
+
+########################################################################
+###  namedtuple  #######################################################
 
 def namedtuple(typename, field_names, verbose=False):
     """Returns a new subclass of tuple with named fields.
@@ -108,7 +113,160 @@
     return result
 
 
+########################################################################
+###  Counter  ##########################################################
 
+class Counter(dict):
+    '''Dict subclass for counting hashable items.  Sometimes called a bag
+    or multiset.  Elements are stored as dictionary keys and their counts
+    are stored as dictionary values.
+
+    >>> c = Counter('abracadabra')      # count elements from a string
+
+    >>> c.most_common(3)                # three most common elements
+    [('a', 5), ('r', 2), ('b', 2)]
+    >>> sorted(c)                       # list all unique elements
+    ['a', 'b', 'c', 'd', 'r']
+    >>> ''.join(sorted(c.elements()))   # list elements with repetitions
+    'aaaaabbcdrr'
+    >>> sum(c.values())                 # total of all counts
+    11
+
+    >>> c['a']                          # count of letter 'a'
+    5
+    >>> for elem in 'shazam':           # update counts from an iterable
+    ...     c[elem] += 1                # by adding 1 to each element's count
+    >>> c['a']                          # now there are seven 'a'
+    7
+    >>> del c['r']                      # remove all 'r'
+    >>> c['r']                          # now there are zero 'r'
+    0
+
+    >>> d = Counter('simsalabim')       # make another counter
+    >>> c.update(d)                     # add in the second counter
+    >>> c['a']                          # now there are nine 'a'
+    9
+
+    >>> c.clear()                       # empty the counter
+    >>> c
+    Counter()
+
+    Note:  If a count is set to zero or reduced to zero, it will remain
+    in the counter until the entry is deleted or the counter is cleared:
+
+    >>> c = Counter('aaabbc')
+    >>> c['b'] -= 2                     # reduce the count of 'b' by two
+    >>> c.most_common()                 # 'b' is still in, but its count is zero
+    [('a', 3), ('c', 1), ('b', 0)]
+
+    '''
+    # References:
+    #   http://en.wikipedia.org/wiki/Multiset
+    #   http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
+    #   http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
+    #   http://code.activestate.com/recipes/259174/
+    #   Knuth, TAOCP Vol. II section 4.6.3
+
+    def __init__(self, iterable=None, items=None):
+        '''Create a new, empty Counter object.  And if given, count elements
+        from an input iterable.  Or, initialize the count from an items list
+        of (element, count) pairs.
+
+        >>> c = Counter('hocus pocus')              # count elements in an iterable
+        >>> c = Counter(items=[('a', 4), ('b', 2)]) # take counts from an items list
+
+        '''
+        if iterable is not None:
+            for elem in iterable:
+                self[elem] += 1
+        if items is not None:
+            for elem, count in items:
+                self[elem] += count
+
+    def __missing__(self, key):
+        'The count of elements not in the Counter is zero.'
+        # Needed so that self[missing_item] does not raise KeyError
+        return 0
+
+    def most_common(self, n=None):
+        '''List the n most common elements and their counts from the most
+        common to the least.  If n is None, then list all element counts.
+
+        >>> Counter('abracadabra').most_common(3)
+        [('a', 5), ('r', 2), ('b', 2)]
+
+        '''
+        # Emulate Bag.sortedByCount from Smalltalk
+        if n is None:
+            return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
+        return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
+
+    def elements(self):
+        '''Iterator over elements repeating each as many times as its count.
+
+        >>> c = Counter('ABCABC')
+        >>> sorted(c.elements())
+        ['A', 'A', 'B', 'B', 'C', 'C']
+
+        # Knuth's example of prime factors of 1836:  2**2 * 3**3 * 17**1
+        >>> import operator
+        >>> prime_factors = Counter(items=[(2,2), (3,3), (17,1)])
+        >>> sorted(prime_factors.elements())         # list individual factors
+        [2, 2, 3, 3, 3, 17]
+        >>> reduce(operator.mul, prime_factors.elements(), 1)  # multiply them
+        1836
+
+        Note, if an element's count has been set to zero or a negative number,
+        elements() will ignore it.
+
+        '''
+        # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
+        return _itertools.chain.from_iterable(
+                                    _itertools.starmap(_itertools.repeat,
+                                                       self.iteritems()))
+
+    # Override dict methods where necessary
+
+    @classmethod
+    def fromkeys(cls, iterable, v=None):
+        # There is no equivalent method for counters because setting v=1
+        # means that no element can have a count greater than one.
+        raise NotImplementedError(
+            'Counter.fromkeys() is undefined.  Use Counter(iterable) instead.')
+
+    def update(self, mapping):
+        '''Like dict.update() but add counts instead of replacing them.
+
+        Source can be another dictionary or a Counter.instance().
+
+        >>> c = Counter('which')
+        >>> d = Counter('witch')
+        >>> c.update(d)                 # Add counts from d to those in c
+        >>> c['h']                      # Count of 'h' is now three
+        3
+
+        '''
+        # The regular dict.update() operation makes no sense here because the
+        # replace behavior results in the some of original untouched counts
+        # being mixed-in with all of the other counts for a mismash that
+        # doesn't have a straight-forward interpretation in most counting
+        # contexts.  Instead, we look to Knuth for suggested operations on
+        # multisets and implement the union-add operation discussed in
+        # TAOCP Volume II section 4.6.3 exercise 19.  The Wikipedia entry for
+        # multisets calls that operation a sum or join.
+        for elem, count in mapping.iteritems():
+            self[elem] += count
+
+    def copy(self):
+        'Like dict.copy() but returns a Counter instance instead of a dict.'
+        c = Counter()
+        c.update(self)
+        return c
+
+    def __repr__(self):
+        if not self:
+            return '%s()' % self.__class__.__name__
+        return '%s(items=%r)' % (self.__class__.__name__, self.most_common())
 
 
 
@@ -143,6 +301,49 @@
     Point3D = namedtuple('Point3D', Point._fields + ('z',))
     print Point3D.__doc__
 
+    # Check that counters are copyable, deepcopyable, picklable, and have a
+    # repr/eval round-trip
+    import copy
+    words = Counter('which witch had which witches wrist watch'.split())
+    update_test = Counter()
+    update_test.update(words)
+    for i, dup in enumerate([
+                words.copy(),
+                copy.copy(words),
+                copy.deepcopy(words),
+                loads(dumps(words, 0)),
+                loads(dumps(words, 1)),
+                loads(dumps(words, 2)),
+                loads(dumps(words, -1)),
+                eval(repr(words)),
+                update_test,
+                ]):
+        msg = (i, dup, words)
+        assert dup is not words, msg
+        assert dup == words, msg
+        assert len(dup) == len(words), msg
+        assert type(dup) == type(words), msg
+
+    # Verify that counters are unhashable
+    try:
+        hash(words)
+    except TypeError:
+        pass
+    else:
+        print 'Failed hashing test'
+
+    # Verify that Counter.fromkeys() is disabled
+    try:
+        Counter.fromkeys('razmataz')
+    except NotImplementedError:
+        pass
+    else:
+        print 'Failed fromkeys() test'
+
+    # Check ABCs
+    assert issubclass(Counter, Mapping)
+    assert isinstance(Counter('asdfasdf'), Mapping)
+
     import doctest
     TestResults = namedtuple('TestResults', 'failed attempted')
     print TestResults(*doctest.testmod())
diff --git a/Lib/test/test_collections.py b/Lib/test/test_collections.py
index 7dffd73..00882e2 100644
--- a/Lib/test/test_collections.py
+++ b/Lib/test/test_collections.py
@@ -1,6 +1,6 @@
 import unittest, doctest
 from test import test_support
-from collections import namedtuple
+from collections import namedtuple, Counter, Mapping
 import pickle, cPickle, copy
 from collections import Hashable, Iterable, Iterator
 from collections import Sized, Container, Callable
@@ -346,11 +346,107 @@
             self.failUnless(issubclass(sample, MutableSequence))
         self.failIf(issubclass(basestring, MutableSequence))
 
+class TestCounter(unittest.TestCase):
+
+    def test_basics(self):
+        c = Counter('abcaba')
+        self.assert_(isinstance(c, dict))
+        self.assert_(isinstance(c, Mapping))
+        self.assert_(issubclass(Counter, dict))
+        self.assert_(issubclass(Counter, Mapping))
+        self.assertEqual(len(c), 3)
+        self.assertEqual(sum(c.values()), 6)
+        self.assertEqual(sorted(c.values()), [1, 2, 3])
+        self.assertEqual(sorted(c.keys()), ['a', 'b', 'c'])
+        self.assertEqual(sorted(c), ['a', 'b', 'c'])
+        self.assertEqual(sorted(c.items()),
+                         [('a', 3), ('b', 2), ('c', 1)])
+        self.assertEqual(c['b'], 2)
+        self.assertEqual(c['z'], 0)
+        self.assertEqual(c.has_key('c'), True)
+        self.assertEqual(c.has_key('z'), False)
+        self.assertEqual(c.__contains__('c'), True)
+        self.assertEqual(c.__contains__('z'), False)
+        self.assertEqual(c.get('b', 10), 2)
+        self.assertEqual(c.get('z', 10), 10)
+        self.assertEqual(c, dict(a=3, b=2, c=1))
+        self.assertEqual(repr(c),
+                         "Counter(items=[('a', 3), ('b', 2), ('c', 1)])")
+        self.assertEqual(c.most_common(), [('a', 3), ('b', 2), ('c', 1)])
+        for i in range(5):
+            self.assertEqual(c.most_common(i),
+                             [('a', 3), ('b', 2), ('c', 1)][:i])
+        self.assertEqual(''.join(sorted(c.elements())), 'aaabbc')
+        c['a'] += 1         # increment an existing value
+        c['b'] -= 2         # sub existing value to zero
+        del c['c']          # remove an entry
+        c['d'] -= 2         # sub from a missing value
+        c['e'] = -5         # directly assign a missing value
+        c['f'] += 4         # add to a missing value
+        self.assertEqual(c, dict(a=4, b=0, d=-2, e=-5, f=4))
+        self.assertEqual(''.join(sorted(c.elements())), 'aaaaffff')
+        self.assertEqual(c.pop('f'), 4)
+        self.assertEqual('f' in c, False)
+        for i in range(3):
+            elem, cnt = c.popitem()
+            self.assertEqual(elem in c, False)
+        c.clear()
+        self.assertEqual(c, {})
+        self.assertEqual(repr(c), 'Counter()')
+        self.assertRaises(NotImplementedError, Counter.fromkeys, 'abc')
+        self.assertRaises(TypeError, hash, c)
+        c.update(dict(a=5, b=3, c=1))
+        c.update(Counter(items=[('a', 50), ('b', 30)]))
+        c.__init__(items=[('a', 500), ('b', 300)])
+        c.__init__('cdc')
+        self.assertEqual(c, dict(a=555, b=333, c=3, d=1))
+        self.assertEqual(c.setdefault('d', 5), 1)
+        self.assertEqual(c['d'], 1)
+        self.assertEqual(c.setdefault('e', 5), 5)
+        self.assertEqual(c['e'], 5)
+
+    def test_copying(self):
+        # Check that counters are copyable, deepcopyable, picklable, and
+        #have a repr/eval round-trip
+        words = Counter('which witch had which witches wrist watch'.split())
+        update_test = Counter()
+        update_test.update(words)
+        for i, dup in enumerate([
+                    words.copy(),
+                    copy.copy(words),
+                    copy.deepcopy(words),
+                    pickle.loads(pickle.dumps(words, 0)),
+                    pickle.loads(pickle.dumps(words, 1)),
+                    pickle.loads(pickle.dumps(words, 2)),
+                    pickle.loads(pickle.dumps(words, -1)),
+                    cPickle.loads(cPickle.dumps(words, 0)),
+                    cPickle.loads(cPickle.dumps(words, 1)),
+                    cPickle.loads(cPickle.dumps(words, 2)),
+                    cPickle.loads(cPickle.dumps(words, -1)),
+                    eval(repr(words)),
+                    update_test,
+                    ]):
+            msg = (i, dup, words)
+            self.assert_(dup is not words)
+            self.assertEquals(dup, words)
+            self.assertEquals(len(dup), len(words))
+            self.assertEquals(type(dup), type(words))
+
+    def test_conversions(self):
+        # Convert to: set, list, dict
+        s = 'she sells sea shells by the sea shore'
+        self.assertEqual(sorted(Counter(s).elements()), sorted(s))
+        self.assertEqual(sorted(Counter(s)), sorted(set(s)))
+        self.assertEqual(dict(Counter(s)), dict(Counter(s).items()))
+        self.assertEqual(set(Counter(s)), set(s))
+
+
 import doctest, collections
 
 def test_main(verbose=None):
     NamedTupleDocs = doctest.DocTestSuite(module=collections)
-    test_classes = [TestNamedTuple, NamedTupleDocs, TestOneTrickPonyABCs, TestCollectionABCs]
+    test_classes = [TestNamedTuple, NamedTupleDocs, TestOneTrickPonyABCs,
+                    TestCollectionABCs, TestCounter]
     test_support.run_unittest(*test_classes)
     test_support.run_doctest(collections, verbose)