Skip Montanaro | f2c4711 | 2003-01-01 20:26:47 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | |
| 3 | """ |
| 4 | For each argument on the command line, look for it in the set of all Unicode |
| 5 | names. Arguments are treated as case-insensitive regular expressions, e.g.: |
| 6 | |
| 7 | % find-uname 'small letter a$' 'horizontal line' |
| 8 | *** small letter a$ matches *** |
| 9 | LATIN SMALL LETTER A (97) |
| 10 | COMBINING LATIN SMALL LETTER A (867) |
| 11 | CYRILLIC SMALL LETTER A (1072) |
| 12 | PARENTHESIZED LATIN SMALL LETTER A (9372) |
| 13 | CIRCLED LATIN SMALL LETTER A (9424) |
| 14 | FULLWIDTH LATIN SMALL LETTER A (65345) |
| 15 | *** horizontal line matches *** |
| 16 | HORIZONTAL LINE EXTENSION (9135) |
| 17 | """ |
| 18 | |
| 19 | import unicodedata |
| 20 | import sys |
| 21 | import re |
| 22 | |
| 23 | def main(args): |
| 24 | unicode_names= [] |
| 25 | for ix in range(sys.maxunicode+1): |
| 26 | try: |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 27 | unicode_names.append( (ix, unicodedata.name(chr(ix))) ) |
Skip Montanaro | f2c4711 | 2003-01-01 20:26:47 +0000 | [diff] [blame] | 28 | except ValueError: # no name for the character |
| 29 | pass |
| 30 | for arg in args: |
| 31 | pat = re.compile(arg, re.I) |
| 32 | matches = [(x,y) for (x,y) in unicode_names |
| 33 | if pat.search(y) is not None] |
| 34 | if matches: |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 35 | print("***", arg, "matches", "***") |
Skip Montanaro | f2c4711 | 2003-01-01 20:26:47 +0000 | [diff] [blame] | 36 | for (x,y) in matches: |
Collin Winter | 6f2df4d | 2007-07-17 20:59:35 +0000 | [diff] [blame] | 37 | print("%s (%d)" % (y,x)) |
Skip Montanaro | f2c4711 | 2003-01-01 20:26:47 +0000 | [diff] [blame] | 38 | |
| 39 | if __name__ == "__main__": |
| 40 | main(sys.argv[1:]) |