Issue #14744: Use the new _PyUnicodeWriter internal API to speed up str%args and str.format(args)

 * Formatting string, int, float and complex use the _PyUnicodeWriter API. It
   avoids a temporary buffer in most cases.
 * Add _PyUnicodeWriter_WriteStr() to restore the PyAccu optimization: just
   keep a reference to the string if the output is only composed of one string
 * Disable overallocation when formatting the last argument of str%args and
   str.format(args)
 * Overallocation allocates at least 100 characters: add min_length attribute
   to the _PyUnicodeWriter structure
 * Add new private functions: _PyUnicode_FastCopyCharacters(),
   _PyUnicode_FastFill() and _PyUnicode_FromASCII()

The speed up is around 20% in average.
diff --git a/Objects/complexobject.c b/Objects/complexobject.c
index b73dc4b..403c60c 100644
--- a/Objects/complexobject.c
+++ b/Objects/complexobject.c
@@ -699,11 +699,22 @@
 complex__format__(PyObject* self, PyObject* args)
 {
     PyObject *format_spec;
+    _PyUnicodeWriter writer;
+    int ret;
 
     if (!PyArg_ParseTuple(args, "U:__format__", &format_spec))
-    return NULL;
-    return _PyComplex_FormatAdvanced(self, format_spec, 0,
-                                     PyUnicode_GET_LENGTH(format_spec));
+        return NULL;
+
+    _PyUnicodeWriter_Init(&writer, 0);
+    ret = _PyComplex_FormatAdvancedWriter(
+        &writer,
+        self,
+        format_spec, 0, PyUnicode_GET_LENGTH(format_spec));
+    if (ret == -1) {
+        _PyUnicodeWriter_Dealloc(&writer);
+        return NULL;
+    }
+    return _PyUnicodeWriter_Finish(&writer);
 }
 
 #if 0