SF #989185: Drop unicode.iswide() and unicode.width() and add unicodedata.east_asian_width(). You can still implement your own simple width() function using it like this: def width(u): w = 0 for c in unicodedata.normalize('NFC', u): cwidth = unicodedata.east_asian_width(c) if cwidth in ('W', 'F'): w += 2 else: w += 1 return w

commit: e9ddfbb41207328d5c89061067f3431e00711fda [log] [tgz]
author: Hye-Shik Chang <hyeshik@gmail.com> Wed Aug 04 07:38:35 2004 +0000
committer: Hye-Shik Chang <hyeshik@gmail.com> Wed Aug 04 07:38:35 2004 +0000
tree: 54093161fe6808de7d6fcc3304eb32241231f010
parent: b5047fd01948ab108edcc1b3c2c901d915814cfd [diff] [blame]
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py
index 6c29fd1..7186780 100644
--- a/Tools/unicode/makeunicodedata.py
+++ b/Tools/unicode/makeunicodedata.py

@@ -43,6 +43,8 @@
     "PDF", "EN", "ES", "ET", "AN", "CS", "NSM", "BN", "B", "S", "WS",
     "ON" ]
 
+EASTASIANWIDTH_NAMES = [ "F", "H", "W", "Na", "A", "N" ]
+
 # note: should match definitions in Objects/unicodectype.c
 ALPHA_MASK = 0x01
 DECIMAL_MASK = 0x02
@@ -52,7 +54,6 @@
 SPACE_MASK = 0x20
 TITLE_MASK = 0x40
 UPPER_MASK = 0x80
-WIDE_MASK = 0x100
 
 def maketables(trace=0):
 
@@ -72,7 +73,7 @@
 
 def makeunicodedata(unicode, trace):
 
-    dummy = (0, 0, 0, 0)
+    dummy = (0, 0, 0, 0, 0)
     table = [dummy]
     cache = {0: dummy}
     index = [0] * len(unicode.chars)
@@ -91,8 +92,9 @@
             combining = int(record[3])
             bidirectional = BIDIRECTIONAL_NAMES.index(record[4])
             mirrored = record[9] == "Y"
+            eastasianwidth = EASTASIANWIDTH_NAMES.index(record[15])
             item = (
-                category, combining, bidirectional, mirrored
+                category, combining, bidirectional, mirrored, eastasianwidth
                 )
             # add entry to index and item tables
             i = cache.get(item)
@@ -204,7 +206,7 @@
     print >>fp, \
           "const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {"
     for item in table:
-        print >>fp, "    {%d, %d, %d, %d}," % item
+        print >>fp, "    {%d, %d, %d, %d, %d}," % item
     print >>fp, "};"
     print >>fp
 
@@ -239,6 +241,12 @@
     print >>fp, "    NULL"
     print >>fp, "};"
 
+    print >>fp, "const char *_PyUnicode_EastAsianWidthNames[] = {"
+    for name in EASTASIANWIDTH_NAMES:
+        print >>fp, "    \"%s\"," % name
+    print >>fp, "    NULL"
+    print >>fp, "};"
+
     print >>fp, "static const char *decomp_prefix[] = {"
     for name in decomp_prefix:
         print >>fp, "    \"%s\"," % name
@@ -334,8 +342,6 @@
             if record[7]:
                 flags |= DIGIT_MASK
                 digit = int(record[7])
-            if record[15] in ('W', 'F'): # Wide or Full width
-                flags |= WIDE_MASK
             item = (
                 upper, lower, title, decimal, digit, flags
                 )
commit	e9ddfbb41207328d5c89061067f3431e00711fda	[log] [tgz]
author	Hye-Shik Chang <hyeshik@gmail.com>	Wed Aug 04 07:38:35 2004 +0000
committer	Hye-Shik Chang <hyeshik@gmail.com>	Wed Aug 04 07:38:35 2004 +0000
tree	54093161fe6808de7d6fcc3304eb32241231f010
parent	b5047fd01948ab108edcc1b3c2c901d915814cfd [diff] [blame]