| # |
| # genmap_support.py: Multibyte Codec Map Generator |
| # |
| # Original Author: Hye-Shik Chang <perky@FreeBSD.org> |
| # Modified Author: Dong-hee Na <donghee.na92@gmail.com> |
| # |
| |
| |
| class BufferedFiller: |
| def __init__(self, column=78): |
| self.column = column |
| self.buffered = [] |
| self.cline = [] |
| self.clen = 0 |
| self.count = 0 |
| |
| def write(self, *data): |
| for s in data: |
| if len(s) > self.column: |
| raise ValueError("token is too long") |
| if len(s) + self.clen > self.column: |
| self.flush() |
| self.clen += len(s) |
| self.cline.append(s) |
| self.count += 1 |
| |
| def flush(self): |
| if not self.cline: |
| return |
| self.buffered.append(''.join(self.cline)) |
| self.clen = 0 |
| del self.cline[:] |
| |
| def printout(self, fp): |
| self.flush() |
| for l in self.buffered: |
| fp.write(f'{l}\n') |
| del self.buffered[:] |
| |
| def __len__(self): |
| return self.count |
| |
| |
| class DecodeMapWriter: |
| filler_class = BufferedFiller |
| |
| def __init__(self, fp, prefix, decode_map): |
| self.fp = fp |
| self.prefix = prefix |
| self.decode_map = decode_map |
| self.filler = self.filler_class() |
| |
| def update_decode_map(self, c1range, c2range, onlymask=(), wide=0): |
| c2values = range(c2range[0], c2range[1] + 1) |
| |
| for c1 in range(c1range[0], c1range[1] + 1): |
| if c1 not in self.decode_map or (onlymask and c1 not in onlymask): |
| continue |
| c2map = self.decode_map[c1] |
| rc2values = [n for n in c2values if n in c2map] |
| if not rc2values: |
| continue |
| |
| c2map[self.prefix] = True |
| c2map['min'] = rc2values[0] |
| c2map['max'] = rc2values[-1] |
| c2map['midx'] = len(self.filler) |
| |
| for v in range(rc2values[0], rc2values[-1] + 1): |
| if v in c2map: |
| self.filler.write('%d,' % c2map[v]) |
| else: |
| self.filler.write('U,') |
| |
| def generate(self, wide=False): |
| if not wide: |
| self.fp.write(f"static const ucs2_t __{self.prefix}_decmap[{len(self.filler)}] = {{\n") |
| else: |
| self.fp.write(f"static const Py_UCS4 __{self.prefix}_decmap[{len(self.filler)}] = {{\n") |
| |
| self.filler.printout(self.fp) |
| self.fp.write("};\n\n") |
| |
| if not wide: |
| self.fp.write(f"static const struct dbcs_index {self.prefix}_decmap[256] = {{\n") |
| else: |
| self.fp.write(f"static const struct widedbcs_index {self.prefix}_decmap[256] = {{\n") |
| |
| for i in range(256): |
| if i in self.decode_map and self.prefix in self.decode_map[i]: |
| m = self.decode_map |
| prefix = self.prefix |
| else: |
| self.filler.write("{", "0,", "0,", "0", "},") |
| continue |
| |
| self.filler.write("{", "__%s_decmap" % prefix, "+", "%d" % m[i]['midx'], |
| ",", "%d," % m[i]['min'], "%d" % m[i]['max'], "},") |
| self.filler.printout(self.fp) |
| self.fp.write("};\n\n") |
| |
| |
| class EncodeMapWriter: |
| filler_class = BufferedFiller |
| elemtype = 'DBCHAR' |
| indextype = 'struct unim_index' |
| |
| def __init__(self, fp, prefix, encode_map): |
| self.fp = fp |
| self.prefix = prefix |
| self.encode_map = encode_map |
| self.filler = self.filler_class() |
| |
| def generate(self): |
| self.buildmap() |
| self.printmap() |
| |
| def buildmap(self): |
| for c1 in range(0, 256): |
| if c1 not in self.encode_map: |
| continue |
| c2map = self.encode_map[c1] |
| rc2values = [k for k in c2map.keys()] |
| rc2values.sort() |
| if not rc2values: |
| continue |
| |
| c2map[self.prefix] = True |
| c2map['min'] = rc2values[0] |
| c2map['max'] = rc2values[-1] |
| c2map['midx'] = len(self.filler) |
| |
| for v in range(rc2values[0], rc2values[-1] + 1): |
| if v not in c2map: |
| self.write_nochar() |
| elif isinstance(c2map[v], int): |
| self.write_char(c2map[v]) |
| elif isinstance(c2map[v], tuple): |
| self.write_multic(c2map[v]) |
| else: |
| raise ValueError |
| |
| def write_nochar(self): |
| self.filler.write('N,') |
| |
| def write_multic(self, point): |
| self.filler.write('M,') |
| |
| def write_char(self, point): |
| self.filler.write(str(point) + ',') |
| |
| def printmap(self): |
| self.fp.write(f"static const {self.elemtype} __{self.prefix}_encmap[{len(self.filler)}] = {{\n") |
| self.filler.printout(self.fp) |
| self.fp.write("};\n\n") |
| self.fp.write(f"static const {self.indextype} {self.prefix}_encmap[256] = {{\n") |
| |
| for i in range(256): |
| if i in self.encode_map and self.prefix in self.encode_map[i]: |
| self.filler.write("{", "__%s_encmap" % self.prefix, "+", |
| "%d" % self.encode_map[i]['midx'], ",", |
| "%d," % self.encode_map[i]['min'], |
| "%d" % self.encode_map[i]['max'], "},") |
| else: |
| self.filler.write("{", "0,", "0,", "0", "},") |
| continue |
| self.filler.printout(self.fp) |
| self.fp.write("};\n\n") |
| |
| |
| def open_mapping_file(path, source): |
| try: |
| f = open(path) |
| except IOError: |
| raise SystemExit(f'{source} is needed') |
| return f |
| |
| |
| def print_autogen(fo, source): |
| fo.write(f'// AUTO-GENERATED FILE FROM {source}: DO NOT EDIT\n') |
| |
| |
| def loadmap(fo, natcol=0, unicol=1, sbcs=0): |
| print("Loading from", fo) |
| fo.seek(0, 0) |
| decmap = {} |
| for line in fo: |
| line = line.split('#', 1)[0].strip() |
| if not line or len(line.split()) < 2: |
| continue |
| |
| row = [eval(e) for e in line.split()] |
| loc, uni = row[natcol], row[unicol] |
| if loc >= 0x100 or sbcs: |
| decmap.setdefault((loc >> 8), {}) |
| decmap[(loc >> 8)][(loc & 0xff)] = uni |
| |
| return decmap |