Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 1 | """sstruct.py -- SuperStruct |
| 2 | |
| 3 | Higher level layer on top of the struct module, enabling to |
| 4 | bind names to struct elements. The interface is similar to |
| 5 | struct, except the objects passed and returned are not tuples |
| 6 | (or argument lists), but dictionaries or instances. |
| 7 | |
| 8 | Just like struct, we use format strings to describe a data |
| 9 | structure, except we use one line per element. Lines are |
| 10 | separated by newlines or semi-colons. Each line contains |
| 11 | either one of the special struct characters ('@', '=', '<', |
| 12 | '>' or '!') or a 'name:formatchar' combo (eg. 'myFloat:f'). |
| 13 | Repetitions, like the struct module offers them are not useful |
| 14 | in this context, except for fixed length strings (eg. 'myInt:5h' |
| 15 | is not allowed but 'myString:5s' is). The 'x' format character |
| 16 | (pad byte) is treated as 'special', since it is by definition |
| 17 | anonymous. Extra whitespace is allowed everywhere. |
| 18 | |
| 19 | The sstruct module offers one feature that the "normal" struct |
| 20 | module doesn't: support for fixed point numbers. These are spelled |
| 21 | as "n.mF", where n is the number of bits before the point, and m |
| 22 | the number of bits after the point. Fixed point numbers get |
| 23 | converted to floats. |
| 24 | |
| 25 | pack(format, object): |
| 26 | 'object' is either a dictionary or an instance (or actually |
| 27 | anything that has a __dict__ attribute). If it is a dictionary, |
| 28 | its keys are used for names. If it is an instance, it's |
| 29 | attributes are used to grab struct elements from. Returns |
| 30 | a string containing the data. |
| 31 | |
| 32 | unpack(format, data, object=None) |
| 33 | If 'object' is omitted (or None), a new dictionary will be |
| 34 | returned. If 'object' is a dictionary, it will be used to add |
| 35 | struct elements to. If it is an instance (or in fact anything |
| 36 | that has a __dict__ attribute), an attribute will be added for |
| 37 | each struct element. In the latter two cases, 'object' itself |
| 38 | is returned. |
| 39 | |
| 40 | unpack2(format, data, object=None) |
| 41 | Convenience function. Same as unpack, except data may be longer |
| 42 | than needed. The returned value is a tuple: (object, leftoverdata). |
| 43 | |
| 44 | calcsize(format) |
| 45 | like struct.calcsize(), but uses our own format strings: |
| 46 | it returns the size of the data in bytes. |
| 47 | """ |
| 48 | |
| 49 | # XXX I would like to support pascal strings, too, but I'm not |
| 50 | # sure if that's wise. Would be nice if struct supported them |
| 51 | # "properly", but that would certainly break calcsize()... |
| 52 | |
| 53 | __version__ = "1.2" |
| 54 | __copyright__ = "Copyright 1998, Just van Rossum <just@letterror.com>" |
| 55 | |
| 56 | import struct |
| 57 | import re |
Behdad Esfahbod | 7ed91ec | 2013-11-27 15:16:28 -0500 | [diff] [blame^] | 58 | from .py23 import * |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 59 | |
| 60 | |
| 61 | error = "sstruct.error" |
| 62 | |
| 63 | def pack(format, object): |
| 64 | formatstring, names, fixes = getformat(format) |
| 65 | elements = [] |
Behdad Esfahbod | 002c32f | 2013-11-27 04:48:20 -0500 | [diff] [blame] | 66 | if not isinstance(object, dict): |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 67 | object = object.__dict__ |
| 68 | for name in names: |
| 69 | value = object[name] |
Behdad Esfahbod | bc5e1cb | 2013-11-27 02:33:03 -0500 | [diff] [blame] | 70 | if name in fixes: |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 71 | # fixed point conversion |
| 72 | value = int(round(value*fixes[name])) |
| 73 | elements.append(value) |
Behdad Esfahbod | 66214cb | 2013-11-27 02:18:18 -0500 | [diff] [blame] | 74 | data = struct.pack(*(formatstring,) + tuple(elements)) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 75 | return data |
| 76 | |
| 77 | def unpack(format, data, object=None): |
| 78 | if object is None: |
| 79 | object = {} |
| 80 | formatstring, names, fixes = getformat(format) |
Behdad Esfahbod | 002c32f | 2013-11-27 04:48:20 -0500 | [diff] [blame] | 81 | if isinstance(object, dict): |
| 82 | d = object |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 83 | else: |
Behdad Esfahbod | 002c32f | 2013-11-27 04:48:20 -0500 | [diff] [blame] | 84 | d = object.__dict__ |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 85 | elements = struct.unpack(formatstring, data) |
| 86 | for i in range(len(names)): |
| 87 | name = names[i] |
| 88 | value = elements[i] |
Behdad Esfahbod | bc5e1cb | 2013-11-27 02:33:03 -0500 | [diff] [blame] | 89 | if name in fixes: |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 90 | # fixed point conversion |
| 91 | value = value / fixes[name] |
Behdad Esfahbod | 002c32f | 2013-11-27 04:48:20 -0500 | [diff] [blame] | 92 | d[name] = value |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 93 | return object |
| 94 | |
| 95 | def unpack2(format, data, object=None): |
| 96 | length = calcsize(format) |
| 97 | return unpack(format, data[:length], object), data[length:] |
| 98 | |
| 99 | def calcsize(format): |
| 100 | formatstring, names, fixes = getformat(format) |
| 101 | return struct.calcsize(formatstring) |
| 102 | |
| 103 | |
| 104 | # matches "name:formatchar" (whitespace is allowed) |
| 105 | _elementRE = re.compile( |
| 106 | "\s*" # whitespace |
| 107 | "([A-Za-z_][A-Za-z_0-9]*)" # name (python identifier) |
| 108 | "\s*:\s*" # whitespace : whitespace |
| 109 | "([cbBhHiIlLfd]|[0-9]+[ps]|" # formatchar... |
| 110 | "([0-9]+)\.([0-9]+)(F))" # ...formatchar |
| 111 | "\s*" # whitespace |
| 112 | "(#.*)?$" # [comment] + end of string |
| 113 | ) |
| 114 | |
| 115 | # matches the special struct format chars and 'x' (pad byte) |
| 116 | _extraRE = re.compile("\s*([x@=<>!])\s*(#.*)?$") |
| 117 | |
| 118 | # matches an "empty" string, possibly containing whitespace and/or a comment |
| 119 | _emptyRE = re.compile("\s*(#.*)?$") |
| 120 | |
| 121 | _fixedpointmappings = { |
| 122 | 8: "b", |
| 123 | 16: "h", |
| 124 | 32: "l"} |
| 125 | |
| 126 | _formatcache = {} |
| 127 | |
| 128 | def getformat(format): |
| 129 | try: |
| 130 | formatstring, names, fixes = _formatcache[format] |
| 131 | except KeyError: |
| 132 | lines = re.split("[\n;]", format) |
| 133 | formatstring = "" |
| 134 | names = [] |
| 135 | fixes = {} |
| 136 | for line in lines: |
| 137 | if _emptyRE.match(line): |
| 138 | continue |
| 139 | m = _extraRE.match(line) |
| 140 | if m: |
| 141 | formatchar = m.group(1) |
Behdad Esfahbod | 180ace6 | 2013-11-27 02:40:30 -0500 | [diff] [blame] | 142 | if formatchar != 'x' and formatstring: |
Behdad Esfahbod | cd5aad9 | 2013-11-27 02:42:28 -0500 | [diff] [blame] | 143 | raise error("a special format char must be first") |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 144 | else: |
| 145 | m = _elementRE.match(line) |
| 146 | if not m: |
Behdad Esfahbod | cd5aad9 | 2013-11-27 02:42:28 -0500 | [diff] [blame] | 147 | raise error("syntax error in format: '%s'" % line) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 148 | name = m.group(1) |
| 149 | names.append(name) |
| 150 | formatchar = m.group(2) |
| 151 | if m.group(3): |
| 152 | # fixed point |
| 153 | before = int(m.group(3)) |
| 154 | after = int(m.group(4)) |
| 155 | bits = before + after |
| 156 | if bits not in [8, 16, 32]: |
Behdad Esfahbod | cd5aad9 | 2013-11-27 02:42:28 -0500 | [diff] [blame] | 157 | raise error("fixed point must be 8, 16 or 32 bits long") |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 158 | formatchar = _fixedpointmappings[bits] |
| 159 | assert m.group(5) == "F" |
| 160 | fixes[name] = float(1 << after) |
| 161 | formatstring = formatstring + formatchar |
| 162 | _formatcache[format] = formatstring, names, fixes |
| 163 | return formatstring, names, fixes |
| 164 | |
| 165 | def _test(): |
| 166 | format = """ |
| 167 | # comments are allowed |
| 168 | > # big endian (see documentation for struct) |
| 169 | # empty lines are allowed: |
| 170 | |
| 171 | ashort: h |
| 172 | along: l |
| 173 | abyte: b # a byte |
| 174 | achar: c |
| 175 | astr: 5s |
| 176 | afloat: f; adouble: d # multiple "statements" are allowed |
| 177 | afixed: 16.16F |
| 178 | """ |
| 179 | |
Behdad Esfahbod | 3ec6a25 | 2013-11-27 04:57:33 -0500 | [diff] [blame] | 180 | print('size:', calcsize(format)) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 181 | |
| 182 | class foo: |
| 183 | pass |
| 184 | |
| 185 | i = foo() |
| 186 | |
| 187 | i.ashort = 0x7fff |
| 188 | i.along = 0x7fffffff |
| 189 | i.abyte = 0x7f |
| 190 | i.achar = "a" |
| 191 | i.astr = "12345" |
| 192 | i.afloat = 0.5 |
| 193 | i.adouble = 0.5 |
| 194 | i.afixed = 1.5 |
| 195 | |
| 196 | data = pack(format, i) |
Behdad Esfahbod | 3ec6a25 | 2013-11-27 04:57:33 -0500 | [diff] [blame] | 197 | print('data:', repr(data)) |
| 198 | print(unpack(format, data)) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 199 | i2 = foo() |
| 200 | unpack(format, data, i2) |
Behdad Esfahbod | 3ec6a25 | 2013-11-27 04:57:33 -0500 | [diff] [blame] | 201 | print(vars(i2)) |
Just | 7842e56 | 1999-12-16 21:34:53 +0000 | [diff] [blame] | 202 | |
| 203 | if __name__ == "__main__": |
| 204 | _test() |