Implementation of PEP 3101, Advanced String Formatting.

Known issues:

The string.Formatter class, as discussed in the PEP, is incomplete.

Error handling needs to conform to the PEP.

Need to fix this warning that I introduced in Python/formatter_unicode.c:
Objects/stringlib/unicodedefs.h:26: warning: `STRINGLIB_CMP' defined but not used

Need to make sure sign formatting is correct, more tests needed.

Need to remove '()' sign formatting, left over from an earlier version of the PEP.
diff --git a/Lib/string.py b/Lib/string.py
index 87073aa..9df78af 100644
--- a/Lib/string.py
+++ b/Lib/string.py
@@ -189,3 +189,42 @@
             raise ValueError('Unrecognized named group in pattern',
                              self.pattern)
         return self.pattern.sub(convert, self.template)
+
+
+
+########################################################################
+# the Formatter class
+# see PEP 3101 for details and purpose of this class
+
+# The hard parts are reused from the C implementation.  They're
+# exposed here via the sys module.  sys was chosen because it's always
+# available and doesn't have to be dynamically loaded.
+
+# The parser is implemented in sys._formatter_parser.
+# The "object lookup" is implemented in sys._formatter_lookup
+
+from sys import _formatter_parser, _formatter_lookup
+
+class Formatter:
+    def format(self, format_string, *args, **kwargs):
+        return self.vformat(format_string, args, kwargs)
+
+    def vformat(self, format_string, args, kwargs):
+        result = []
+        for (is_markup, literal, field_name, format_spec, conversion) in \
+                _formatter_parser(format_string):
+            if is_markup:
+                # find the object
+                index, name, obj = _formatter_lookup(field_name, args, kwargs)
+            else:
+                result.append(literal)
+        return ''.join(result)
+
+    def get_value(self, key, args, kwargs):
+        pass
+
+    def check_unused_args(self, used_args, args, kwargs):
+        pass
+
+    def format_field(self, value, format_spec):
+        pass
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
index f77cf78..0560045 100644
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -517,6 +517,32 @@
         self.assertAlmostEqual(float(Foo3(21)), 42.)
         self.assertRaises(TypeError, float, Foo4(42))
 
+    def test_format(self):
+        class A:
+            def __init__(self, x):
+                self.x = x
+            def __format__(self, format_spec):
+                return str(self.x) + format_spec
+
+        # class that returns a bad type from __format__
+        class H:
+            def __format__(self, format_spec):
+                return 1.0
+
+        self.assertEqual(format(3, ''), '3')
+        self.assertEqual(format(A(3), 'spec'), '3spec')
+
+        # for builtin types, format(x, "") == str(x)
+        self.assertEqual(format(17**13, ""), str(17**13))
+        self.assertEqual(format(1.0, ""), str(1.0))
+        self.assertEqual(format(3.1415e104, ""), str(3.1415e104))
+        self.assertEqual(format(-3.1415e104, ""), str(-3.1415e104))
+        self.assertEqual(format(3.1415e-104, ""), str(3.1415e-104))
+        self.assertEqual(format(-3.1415e-104, ""), str(-3.1415e-104))
+        self.assertEqual(format(object, ""), str(object))
+
+        #self.assertRaises(TypeError, format, H(), "")
+
     def test_getattr(self):
         import sys
         self.assert_(getattr(sys, 'stdout') is sys.stdout)
diff --git a/Lib/test/test_descrtut.py b/Lib/test/test_descrtut.py
index fe29f34..d2f9720 100644
--- a/Lib/test/test_descrtut.py
+++ b/Lib/test/test_descrtut.py
@@ -173,6 +173,7 @@
      '__delslice__',
      '__doc__',
      '__eq__',
+     '__format__',
      '__ge__',
      '__getattribute__',
      '__getitem__',
diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py
index 48abec9..e5a4537 100644
--- a/Lib/test/test_float.py
+++ b/Lib/test/test_float.py
@@ -114,12 +114,44 @@
             self.assertEquals(pos_pos(), neg_pos())
             self.assertEquals(pos_neg(), neg_neg())
 
+class FormatTestCase(unittest.TestCase):
+    def testFormat(self):
+        # these should be rewritten to use both format(x, spec) and
+        # x.__format__(spec)
+
+        self.assertEqual(format(0.0, 'f'), '0.000000')
+
+        # the default is 'g', except for empty format spec
+        self.assertEqual(format(0.0, ''), '0.0')
+        self.assertEqual(format(0.01, ''), '0.01')
+        self.assertEqual(format(0.01, 'g'), '0.01')
+
+        self.assertEqual(format(0, 'f'), '0.000000')
+
+        self.assertEqual(format(1.0, 'f'), '1.000000')
+        self.assertEqual(format(1, 'f'), '1.000000')
+
+        self.assertEqual(format(-1.0, 'f'), '-1.000000')
+        self.assertEqual(format(-1, 'f'), '-1.000000')
+
+        self.assertEqual(format( 1.0, ' f'), ' 1.000000')
+        self.assertEqual(format(-1.0, ' f'), '-1.000000')
+        self.assertEqual(format( 1.0, '+f'), '+1.000000')
+        self.assertEqual(format(-1.0, '+f'), '-1.000000')
+
+        # % formatting
+        self.assertEqual(format(-1.0, '%'), '-100.000000%')
+
+        # conversion to string should fail
+        self.assertRaises(ValueError, format, 3.0, "s")
+
 
 def test_main():
     test_support.run_unittest(
         FormatFunctionsTestCase,
         UnknownFormatTestCase,
-        IEEEFormatTestCase)
+        IEEEFormatTestCase,
+        FormatTestCase)
 
 if __name__ == '__main__':
     test_main()
diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py
index 0b67c3e..4e15340 100644
--- a/Lib/test/test_long.py
+++ b/Lib/test/test_long.py
@@ -493,6 +493,50 @@
                 eq(x > y, Rcmp > 0, Frm("%r > %r %d", x, y, Rcmp))
                 eq(x >= y, Rcmp >= 0, Frm("%r >= %r %d", x, y, Rcmp))
 
+    def test_format(self):
+        self.assertEqual(format(123456789, 'd'), '123456789')
+        self.assertEqual(format(123456789, 'd'), '123456789')
+
+        # hex
+        self.assertEqual(format(3, "x"), "3")
+        self.assertEqual(format(3, "X"), "3")
+        self.assertEqual(format(1234, "x"), "4d2")
+        self.assertEqual(format(-1234, "x"), "-4d2")
+        self.assertEqual(format(1234, "8x"), "     4d2")
+# XXX fix       self.assertEqual(format(-1234, "8x"), "    -4d2")
+        self.assertEqual(format(1234, "x"), "4d2")
+        self.assertEqual(format(-1234, "x"), "-4d2")
+        self.assertEqual(format(-3, "x"), "-3")
+        self.assertEqual(format(-3, "X"), "-3")
+        self.assertEqual(format(int('be', 16), "x"), "be")
+        self.assertEqual(format(int('be', 16), "X"), "BE")
+        self.assertEqual(format(-int('be', 16), "x"), "-be")
+        self.assertEqual(format(-int('be', 16), "X"), "-BE")
+
+        # octal
+        self.assertEqual(format(3, "b"), "11")
+        self.assertEqual(format(-3, "b"), "-11")
+        self.assertEqual(format(1234, "b"), "10011010010")
+        self.assertEqual(format(-1234, "b"), "-10011010010")
+        self.assertEqual(format(1234, "-b"), "10011010010")
+        self.assertEqual(format(-1234, "-b"), "-10011010010")
+        self.assertEqual(format(1234, " b"), " 10011010010")
+        self.assertEqual(format(-1234, " b"), "-10011010010")
+        self.assertEqual(format(1234, "+b"), "+10011010010")
+        self.assertEqual(format(-1234, "+b"), "-10011010010")
+
+        # conversion to float
+        self.assertEqual(format(0, 'f'), '0.000000')
+
+        # make sure these are errors
+        self.assertRaises(ValueError, format, 3, "1.3")  # precision disallowed
+        return
+        self.assertRaises(ValueError, format, 3, "+c")   # sign not allowed
+                                                         # with 'c'
+        self.assertRaises(ValueError, format, 3, "R")    # bogus format type
+        # conversion to string should fail
+        self.assertRaises(ValueError, format, 3, "s")
+
 def test_main():
     test_support.run_unittest(LongTest)
 
diff --git a/Lib/test/test_string.py b/Lib/test/test_string.py
index 3b21ebc..ce9fe23 100644
--- a/Lib/test/test_string.py
+++ b/Lib/test/test_string.py
@@ -15,6 +15,14 @@
         string.punctuation
         string.printable
 
+    def test_formatter(self):
+        fmt = string.Formatter()
+        self.assertEqual(fmt.format("foo"), "foo")
+
+        # Formatter not working you for lookups
+        #self.assertEqual(fmt.format("foo{0}", "bar"), "foobar")
+
+
     def test_maketrans(self):
         transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
 
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
index 4b582de..ff0e015 100644
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -357,6 +357,218 @@
 
         self.assertRaises(TypeError, "abc".__contains__)
 
+    def test_format(self):
+        self.assertEqual(''.format(), '')
+        self.assertEqual('a'.format(), 'a')
+        self.assertEqual('ab'.format(), 'ab')
+        self.assertEqual('a{{'.format(), 'a{')
+        self.assertEqual('a}}'.format(), 'a}')
+        self.assertEqual('{{b'.format(), '{b')
+        self.assertEqual('}}b'.format(), '}b')
+        self.assertEqual('a{{b'.format(), 'a{b')
+
+        # examples from the PEP:
+        import datetime
+        self.assertEqual("My name is {0}".format('Fred'), "My name is Fred")
+        self.assertEqual("My name is {0[name]}".format(dict(name='Fred')),
+                         "My name is Fred")
+        self.assertEqual("My name is {0} :-{{}}".format('Fred'),
+                         "My name is Fred :-{}")
+
+        d = datetime.date(2007, 8, 18)
+        self.assertEqual("The year is {0.year}".format(d),
+                         "The year is 2007")
+
+        #"{0!r:20}".format("Hello")
+
+        # classes we'll use for testing
+        class C:
+            def __init__(self, x=100):
+                self._x = x
+            def __format__(self, spec):
+                return spec
+
+        class D:
+            def __init__(self, x):
+                self.x = x
+            def __format__(self, spec):
+                return str(self.x)
+
+        # class with __str__, but no __format__
+        class E:
+            def __init__(self, x):
+                self.x = x
+            def __str__(self):
+                return 'E(' + self.x + ')'
+
+        # class with __repr__, but no __format__ or __str__
+        class F:
+            def __init__(self, x):
+                self.x = x
+            def __repr__(self):
+                return 'F(' + self.x + ')'
+
+        # class with __format__ that forwards to string, for some format_spec's
+        class G:
+            def __init__(self, x):
+                self.x = x
+            def __str__(self):
+                return "string is " + self.x
+            def __format__(self, format_spec):
+                if format_spec == 'd':
+                    return 'G(' + self.x + ')'
+                return object.__format__(self, format_spec)
+
+        # class that returns a bad type from __format__
+        class H:
+            def __format__(self, format_spec):
+                return 1.0
+
+
+        self.assertEqual(''.format(), '')
+        self.assertEqual('abc'.format(), 'abc')
+        self.assertEqual('{0}'.format('abc'), 'abc')
+        self.assertEqual('{0:}'.format('abc'), 'abc')
+#        self.assertEqual('{ 0 }'.format('abc'), 'abc')
+        self.assertEqual('X{0}'.format('abc'), 'Xabc')
+        self.assertEqual('{0}X'.format('abc'), 'abcX')
+        self.assertEqual('X{0}Y'.format('abc'), 'XabcY')
+        self.assertEqual('{1}'.format(1, 'abc'), 'abc')
+        self.assertEqual('X{1}'.format(1, 'abc'), 'Xabc')
+        self.assertEqual('{1}X'.format(1, 'abc'), 'abcX')
+        self.assertEqual('X{1}Y'.format(1, 'abc'), 'XabcY')
+        self.assertEqual('{0}'.format(-15), '-15')
+        self.assertEqual('{0}{1}'.format(-15, 'abc'), '-15abc')
+        self.assertEqual('{0}X{1}'.format(-15, 'abc'), '-15Xabc')
+        self.assertEqual('{{'.format(), '{')
+        self.assertEqual('}}'.format(), '}')
+        self.assertEqual('{{}}'.format(), '{}')
+        self.assertEqual('{{x}}'.format(), '{x}')
+        self.assertEqual('{{{0}}}'.format(123), '{123}')
+        self.assertEqual('{{{{0}}}}'.format(), '{{0}}')
+        self.assertEqual('}}{{'.format(), '}{')
+        self.assertEqual('}}x{{'.format(), '}x{')
+
+        self.assertEqual('{foo._x}'.format(foo=C(20)), '20')
+        self.assertEqual('{1}{0}'.format(D(10), D(20)), '2010')
+        self.assertEqual('{0._x.x}'.format(C(D('abc'))), 'abc')
+        self.assertEqual('{0[0]}'.format(['abc', 'def']), 'abc')
+        self.assertEqual('{0[1]}'.format(['abc', 'def']), 'def')
+        self.assertEqual('{0[1][0]}'.format(['abc', ['def']]), 'def')
+        self.assertEqual('{0[1][0].x}'.format(['abc', [D('def')]]), 'def')
+
+        # I'm not sure if this should work, or if it's a problem if it does work
+        #'{0[_{foo}]}'.format({'_FOO': 'abc'}, foo='FOO')
+        #('{0[{foo}{bar}]}'.format({'FOOBAR': 'abc'}, foo='FOO', bar='BAR')
+
+        # format specifiers for built in types
+
+        # strings
+        self.assertEqual('{0:.3s}'.format('abc'), 'abc')
+        self.assertEqual('{0:.3s}'.format('ab'), 'ab')
+        self.assertEqual('{0:.3s}'.format('abcdef'), 'abc')
+        self.assertEqual('{0:.0s}'.format('abcdef'), '')
+        self.assertEqual('{0:3.3s}'.format('abc'), 'abc')
+        self.assertEqual('{0:2.3s}'.format('abc'), 'abc')
+        self.assertEqual('{0:2.2s}'.format('abc'), 'ab')
+        self.assertEqual('{0:3.2s}'.format('abc'), 'ab ')
+        self.assertEqual('{0:x<0s}'.format('result'), 'result')
+        self.assertEqual('{0:x<5s}'.format('result'), 'result')
+        self.assertEqual('{0:x<6s}'.format('result'), 'result')
+        self.assertEqual('{0:x<7s}'.format('result'), 'resultx')
+        self.assertEqual('{0:x<8s}'.format('result'), 'resultxx')
+        self.assertEqual('{0: <7s}'.format('result'), 'result ')
+        self.assertEqual('{0:<7s}'.format('result'), 'result ')
+        self.assertEqual('{0:>7s}'.format('result'), ' result')
+        self.assertEqual('{0:>8s}'.format('result'), '  result')
+        self.assertEqual('{0:^8s}'.format('result'), ' result ')
+        self.assertEqual('{0:^9s}'.format('result'), ' result  ')
+        self.assertEqual('{0:^10s}'.format('result'), '  result  ')
+        self.assertEqual('{0:10000}'.format('a'), 'a' + ' ' * 9999)
+        self.assertEqual('{0:10000}'.format(''), ' ' * 10000)
+        self.assertEqual('{0:10000000}'.format(''), ' ' * 10000000)
+
+        # format specifiers for user defined type
+        self.assertEqual('{0:abc}'.format(C()), 'abc')
+
+        # !r and !s coersions
+        self.assertEqual('{0!s}'.format('Hello'), 'Hello')
+        self.assertEqual('{0!s:}'.format('Hello'), 'Hello')
+        self.assertEqual('{0!s:15}'.format('Hello'), 'Hello          ')
+        self.assertEqual('{0!s:15s}'.format('Hello'), 'Hello          ')
+        self.assertEqual('{0!r}'.format('Hello'), "'Hello'")
+        self.assertEqual('{0!r:}'.format('Hello'), "'Hello'")
+        self.assertEqual('{0!r}'.format(F('Hello')), 'F(Hello)')
+
+        # XXX should pass, but currently don't
+        # format(object, "")
+
+        # test fallback to object.__format__
+        self.assertEqual('{0}'.format({}), '{}')
+        self.assertEqual('{0}'.format([]), '[]')
+        self.assertEqual('{0}'.format([1]), '[1]')
+        self.assertEqual('{0}'.format(E('data')), 'E(data)')
+        self.assertEqual('{0:^10}'.format(E('data')), ' E(data)  ')
+        self.assertEqual('{0:^10s}'.format(E('data')), ' E(data)  ')
+        self.assertEqual('{0:d}'.format(G('data')), 'G(data)')
+        self.assertEqual('{0:>15s}'.format(G('data')), ' string is data')
+        self.assertEqual('{0!s}'.format(G('data')), 'string is data')
+
+        # string format specifiers
+        self.assertEqual('{0:}'.format('a'), 'a')
+
+        # computed format specifiers
+        self.assertEqual("{0:.{1}}".format('hello world', 5), 'hello')
+        self.assertEqual("{0:.{1}s}".format('hello world', 5), 'hello')
+        self.assertEqual("{0:.{precision}s}".format('hello world', precision=5), 'hello')
+        self.assertEqual("{0:{width}.{precision}s}".format('hello world', width=10, precision=5), 'hello     ')
+        self.assertEqual("{0:{width}.{precision}s}".format('hello world', width='10', precision='5'), 'hello     ')
+
+        # test various errors
+        self.assertRaises(ValueError, '{'.format)
+        self.assertRaises(ValueError, '}'.format)
+        self.assertRaises(ValueError, 'a{'.format)
+        self.assertRaises(ValueError, 'a}'.format)
+        self.assertRaises(ValueError, '{a'.format)
+        self.assertRaises(ValueError, '}a'.format)
+        self.assertRaises(ValueError, '{0}'.format)
+        self.assertRaises(ValueError, '{1}'.format, 'abc')
+        self.assertRaises(ValueError, '{x}'.format)
+        self.assertRaises(ValueError, "}{".format)
+        self.assertRaises(ValueError, "{".format)
+        self.assertRaises(ValueError, "}".format)
+        self.assertRaises(ValueError, "abc{0:{}".format)
+        self.assertRaises(ValueError, "{0".format)
+        self.assertRaises(ValueError, "{0.[]}".format)
+        self.assertRaises(ValueError, "{0[0}".format)
+        self.assertRaises(ValueError, "{0[0:foo}".format)
+        self.assertRaises(ValueError, "{c]}".format)
+        self.assertRaises(ValueError, "{{ {{{0}}".format)
+        self.assertRaises(ValueError, "{0}}".format)
+        self.assertRaises(ValueError, "{foo}".format, bar=3)
+        self.assertRaises(ValueError, "{0!x}".format, 3)
+        self.assertRaises(ValueError, "{0!}".format)
+        self.assertRaises(ValueError, "{0!rs}".format)
+        self.assertRaises(ValueError, "{!}".format)
+        self.assertRaises(ValueError, "{:}".format)
+        self.assertRaises(ValueError, "{}".format)
+
+        # can't have a replacement on the field name portion
+        self.assertRaises(TypeError, '{0[{1}]}'.format, 'abcdefg', 4)
+
+        # exceed maximum recursion depth
+        self.assertRaises(ValueError, "{0:{1:{2}}}".format, 'abc', 's', '')
+        self.assertRaises(ValueError, "{0:{1:{2:{3:{4:{5:{6}}}}}}}".format,
+                          0, 1, 2, 3, 4, 5, 6, 7)
+
+        # string format spec errors
+        self.assertRaises(ValueError, "{0:-s}".format, '')
+        self.assertRaises(ValueError, format, "", "-")
+        self.assertRaises(ValueError, "{0:=s}".format, '')
+
+        # check that __format__ returns a string
+        #self.assertRaises(TypeError, "{0}".format, H())
+
     def test_formatting(self):
         string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
         # Testing Unicode formatting strings...