| #! /usr/bin/env python3 |
| # Written by Martin v. Löwis <loewis@informatik.hu-berlin.de> |
| |
| """Generate binary message catalog from textual translation description. |
| |
| This program converts a textual Uniforum-style message catalog (.po file) into |
| a binary GNU catalog (.mo file). This is essentially the same function as the |
| GNU msgfmt program, however, it is a simpler implementation. |
| |
| Usage: msgfmt.py [OPTIONS] filename.po |
| |
| Options: |
| -o file |
| --output-file=file |
| Specify the output file to write to. If omitted, output will go to a |
| file named filename.mo (based off the input file name). |
| |
| -h |
| --help |
| Print this message and exit. |
| |
| -V |
| --version |
| Display version information and exit. |
| """ |
| |
| import sys |
| import os |
| import getopt |
| import struct |
| import array |
| from email.parser import HeaderParser |
| |
| __version__ = "1.1" |
| |
| MESSAGES = {} |
| |
| |
| |
| def usage(code, msg=''): |
| print(__doc__, file=sys.stderr) |
| if msg: |
| print(msg, file=sys.stderr) |
| sys.exit(code) |
| |
| |
| |
| def add(id, str, fuzzy): |
| "Add a non-fuzzy translation to the dictionary." |
| global MESSAGES |
| if not fuzzy and str: |
| MESSAGES[id] = str |
| |
| |
| |
| def generate(): |
| "Return the generated output." |
| global MESSAGES |
| # the keys are sorted in the .mo file |
| keys = sorted(MESSAGES.keys()) |
| offsets = [] |
| ids = strs = b'' |
| for id in keys: |
| # For each string, we need size and file offset. Each string is NUL |
| # terminated; the NUL does not count into the size. |
| offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id]))) |
| ids += id + b'\0' |
| strs += MESSAGES[id] + b'\0' |
| output = '' |
| # The header is 7 32-bit unsigned integers. We don't use hash tables, so |
| # the keys start right after the index tables. |
| # translated string. |
| keystart = 7*4+16*len(keys) |
| # and the values start after the keys |
| valuestart = keystart + len(ids) |
| koffsets = [] |
| voffsets = [] |
| # The string table first has the list of keys, then the list of values. |
| # Each entry has first the size of the string, then the file offset. |
| for o1, l1, o2, l2 in offsets: |
| koffsets += [l1, o1+keystart] |
| voffsets += [l2, o2+valuestart] |
| offsets = koffsets + voffsets |
| output = struct.pack("Iiiiiii", |
| 0x950412de, # Magic |
| 0, # Version |
| len(keys), # # of entries |
| 7*4, # start of key index |
| 7*4+len(keys)*8, # start of value index |
| 0, 0) # size and offset of hash table |
| output += array.array("i", offsets).tostring() |
| output += ids |
| output += strs |
| return output |
| |
| |
| |
| def make(filename, outfile): |
| ID = 1 |
| STR = 2 |
| |
| # Compute .mo name from .po name and arguments |
| if filename.endswith('.po'): |
| infile = filename |
| else: |
| infile = filename + '.po' |
| if outfile is None: |
| outfile = os.path.splitext(infile)[0] + '.mo' |
| |
| try: |
| lines = open(infile, 'rb').readlines() |
| except IOError as msg: |
| print(msg, file=sys.stderr) |
| sys.exit(1) |
| |
| section = None |
| fuzzy = 0 |
| |
| # Start off assuming Latin-1, so everything decodes without failure, |
| # until we know the exact encoding |
| encoding = 'latin-1' |
| |
| # Parse the catalog |
| lno = 0 |
| for l in lines: |
| l = l.decode(encoding) |
| lno += 1 |
| # If we get a comment line after a msgstr, this is a new entry |
| if l[0] == '#' and section == STR: |
| add(msgid, msgstr, fuzzy) |
| section = None |
| fuzzy = 0 |
| # Record a fuzzy mark |
| if l[:2] == '#,' and 'fuzzy' in l: |
| fuzzy = 1 |
| # Skip comments |
| if l[0] == '#': |
| continue |
| # Now we are in a msgid section, output previous section |
| if l.startswith('msgid') and not l.startswith('msgid_plural'): |
| if section == STR: |
| add(msgid, msgstr, fuzzy) |
| if not msgid: |
| # See whether there is an encoding declaration |
| p = HeaderParser() |
| charset = p.parsestr(msgstr.decode(encoding)).get_content_charset() |
| if charset: |
| encoding = charset |
| section = ID |
| l = l[5:] |
| msgid = msgstr = b'' |
| is_plural = False |
| # This is a message with plural forms |
| elif l.startswith('msgid_plural'): |
| if section != ID: |
| print('msgid_plural not preceeded by msgid on %s:%d' % (infile, lno), |
| file=sys.stderr) |
| sys.exit(1) |
| l = l[12:] |
| msgid += b'\0' # separator of singular and plural |
| is_plural = True |
| # Now we are in a msgstr section |
| elif l.startswith('msgstr'): |
| section = STR |
| if l.startswith('msgstr['): |
| if not is_plural: |
| print('plural without msgid_plural on %s:%d' % (infile, lno), |
| file=sys.stderr) |
| sys.exit(1) |
| l = l.split(']', 1)[1] |
| if msgstr: |
| msgstr += b'\0' # Separator of the various plural forms |
| else: |
| if is_plural: |
| print('indexed msgstr required for plural on %s:%d' % (infile, lno), |
| file=sys.stderr) |
| sys.exit(1) |
| l = l[6:] |
| # Skip empty lines |
| l = l.strip() |
| if not l: |
| continue |
| # XXX: Does this always follow Python escape semantics? |
| l = eval(l) |
| if section == ID: |
| msgid += l.encode(encoding) |
| elif section == STR: |
| msgstr += l.encode(encoding) |
| else: |
| print('Syntax error on %s:%d' % (infile, lno), \ |
| 'before:', file=sys.stderr) |
| print(l, file=sys.stderr) |
| sys.exit(1) |
| # Add last entry |
| if section == STR: |
| add(msgid, msgstr, fuzzy) |
| |
| # Compute output |
| output = generate() |
| |
| try: |
| open(outfile,"wb").write(output) |
| except IOError as msg: |
| print(msg, file=sys.stderr) |
| |
| |
| |
| def main(): |
| try: |
| opts, args = getopt.getopt(sys.argv[1:], 'hVo:', |
| ['help', 'version', 'output-file=']) |
| except getopt.error as msg: |
| usage(1, msg) |
| |
| outfile = None |
| # parse options |
| for opt, arg in opts: |
| if opt in ('-h', '--help'): |
| usage(0) |
| elif opt in ('-V', '--version'): |
| print("msgfmt.py", __version__, file=sys.stderr) |
| sys.exit(0) |
| elif opt in ('-o', '--output-file'): |
| outfile = arg |
| # do it |
| if not args: |
| print('No input file given', file=sys.stderr) |
| print("Try `msgfmt --help' for more information.", file=sys.stderr) |
| return |
| |
| for filename in args: |
| make(filename, outfile) |
| |
| |
| if __name__ == '__main__': |
| main() |