blob: 6b3d13eca451d9008e735a75c19ebb648013b94e [file] [log] [blame]
Guido van Rossum105bd981997-07-11 18:39:03 +00001#! /usr/bin/env python
2
Barry Warsaw9b630a52001-06-19 19:07:46 +00003"""Conversions to/from quoted-printable transport encoding as per RFC 1521."""
Guido van Rossume7b146f2000-02-04 15:28:42 +00004
Guido van Rossumf1945461995-06-14 23:43:44 +00005# (Dec 1991 version).
6
Barry Warsaw9b630a52001-06-19 19:07:46 +00007__all__ = ["encode", "decode", "encodestring", "decodestring"]
Skip Montanaroc62c81e2001-02-12 02:00:42 +00008
Martin v. Löwisc582bfc2007-07-28 17:52:25 +00009ESCAPE = b'='
Guido van Rossumf1945461995-06-14 23:43:44 +000010MAXLINESIZE = 76
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000011HEX = b'0123456789ABCDEF'
12EMPTYSTRING = b''
Guido van Rossumf1945461995-06-14 23:43:44 +000013
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000014try:
Tim Peters527e64f2001-10-04 05:36:56 +000015 from binascii import a2b_qp, b2a_qp
Skip Montanaro3c4a6292002-03-23 05:55:18 +000016except ImportError:
Tim Peters527e64f2001-10-04 05:36:56 +000017 a2b_qp = None
18 b2a_qp = None
Barry Warsaw9b630a52001-06-19 19:07:46 +000019
Tim Petersd1c29652001-07-02 04:57:30 +000020
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000021def needsquoting(c, quotetabs, header):
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000022 """Decide whether a particular byte ordinal needs to be quoted.
Guido van Rossume7b146f2000-02-04 15:28:42 +000023
Barry Warsaw9b630a52001-06-19 19:07:46 +000024 The 'quotetabs' flag indicates whether embedded tabs and spaces should be
25 quoted. Note that line-ending tabs and spaces are always encoded, as per
26 RFC 1521.
27 """
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000028 assert isinstance(c, bytes)
29 if c in b' \t':
Barry Warsaw9b630a52001-06-19 19:07:46 +000030 return quotetabs
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000031 # if header, we have to escape _ because _ is used to escape space
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000032 if c == b'_':
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000033 return header
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000034 return c == ESCAPE or not (b' ' <= c <= b'~')
Guido van Rossumf1945461995-06-14 23:43:44 +000035
36def quote(c):
Jeremy Hylton77249442000-10-05 17:24:33 +000037 """Quote a single character."""
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000038 assert isinstance(c, bytes) and len(c)==1
39 c = ord(c)
40 return ESCAPE + bytes((HEX[c//16], HEX[c%16]))
Guido van Rossumf1945461995-06-14 23:43:44 +000041
Barry Warsaw9b630a52001-06-19 19:07:46 +000042
Tim Petersd1c29652001-07-02 04:57:30 +000043
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000044def encode(input, output, quotetabs, header = 0):
Jeremy Hylton77249442000-10-05 17:24:33 +000045 """Read 'input', apply quoted-printable encoding, and write to 'output'.
Guido van Rossume7b146f2000-02-04 15:28:42 +000046
Jeremy Hylton77249442000-10-05 17:24:33 +000047 'input' and 'output' are files with readline() and write() methods.
Barry Warsaw9b630a52001-06-19 19:07:46 +000048 The 'quotetabs' flag indicates whether embedded tabs and spaces should be
49 quoted. Note that line-ending tabs and spaces are always encoded, as per
50 RFC 1521.
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000051 The 'header' flag indicates whether we are encoding spaces as _ as per
52 RFC 1522.
Barry Warsaw9b630a52001-06-19 19:07:46 +000053 """
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000054
55 if b2a_qp is not None:
56 data = input.read()
57 odata = b2a_qp(data, quotetabs = quotetabs, header = header)
58 output.write(odata)
59 return
Tim Peters527e64f2001-10-04 05:36:56 +000060
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000061 def write(s, output=output, lineEnd=b'\n'):
Barry Warsaw9b630a52001-06-19 19:07:46 +000062 # RFC 1521 requires that the line ending in a space or tab must have
63 # that trailing character encoded.
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000064 if s and s[-1:] in b' \t':
65 output.write(s[:-1] + quote(s[-1:]) + lineEnd)
66 elif s == b'.':
Guido van Rossum1346e832001-10-15 18:44:26 +000067 output.write(quote(s) + lineEnd)
Barry Warsaw9b630a52001-06-19 19:07:46 +000068 else:
69 output.write(s + lineEnd)
70
71 prevline = None
Jeremy Hylton77249442000-10-05 17:24:33 +000072 while 1:
73 line = input.readline()
74 if not line:
75 break
Barry Warsaw9b630a52001-06-19 19:07:46 +000076 outline = []
77 # Strip off any readline induced trailing newline
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000078 stripped = b''
79 if line[-1:] == b'\n':
Jeremy Hylton77249442000-10-05 17:24:33 +000080 line = line[:-1]
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000081 stripped = b'\n'
Barry Warsawdac67ac2001-06-19 22:48:10 +000082 # Calculate the un-length-limited encoded line
Jeremy Hylton77249442000-10-05 17:24:33 +000083 for c in line:
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000084 c = bytes((c,))
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000085 if needsquoting(c, quotetabs, header):
Jeremy Hylton77249442000-10-05 17:24:33 +000086 c = quote(c)
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000087 if header and c == b' ':
88 outline.append(b'_')
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000089 else:
90 outline.append(c)
Barry Warsawdac67ac2001-06-19 22:48:10 +000091 # First, write out the previous line
Barry Warsaw9b630a52001-06-19 19:07:46 +000092 if prevline is not None:
93 write(prevline)
Barry Warsawdac67ac2001-06-19 22:48:10 +000094 # Now see if we need any soft line breaks because of RFC-imposed
95 # length limitations. Then do the thisline->prevline dance.
96 thisline = EMPTYSTRING.join(outline)
97 while len(thisline) > MAXLINESIZE:
98 # Don't forget to include the soft line break `=' sign in the
99 # length calculation!
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000100 write(thisline[:MAXLINESIZE-1], lineEnd=b'=\n')
Barry Warsawdac67ac2001-06-19 22:48:10 +0000101 thisline = thisline[MAXLINESIZE-1:]
102 # Write out the current line
103 prevline = thisline
Barry Warsaw9b630a52001-06-19 19:07:46 +0000104 # Write out the last line, without a trailing newline
105 if prevline is not None:
106 write(prevline, lineEnd=stripped)
Guido van Rossumf1945461995-06-14 23:43:44 +0000107
Martin v. Löwis16dc7f42001-09-30 20:32:11 +0000108def encodestring(s, quotetabs = 0, header = 0):
109 if b2a_qp is not None:
110 return b2a_qp(s, quotetabs = quotetabs, header = header)
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000111 from io import BytesIO
112 infp = BytesIO(s)
113 outfp = BytesIO()
Martin v. Löwis16dc7f42001-09-30 20:32:11 +0000114 encode(infp, outfp, quotetabs, header)
Barry Warsaw9b630a52001-06-19 19:07:46 +0000115 return outfp.getvalue()
116
117
Tim Petersd1c29652001-07-02 04:57:30 +0000118
Martin v. Löwis16dc7f42001-09-30 20:32:11 +0000119def decode(input, output, header = 0):
Jeremy Hylton77249442000-10-05 17:24:33 +0000120 """Read 'input', apply quoted-printable decoding, and write to 'output'.
Martin v. Löwis16dc7f42001-09-30 20:32:11 +0000121 'input' and 'output' are files with readline() and write() methods.
122 If 'header' is true, decode underscore as space (per RFC 1522)."""
Guido van Rossume7b146f2000-02-04 15:28:42 +0000123
Martin v. Löwis16dc7f42001-09-30 20:32:11 +0000124 if a2b_qp is not None:
125 data = input.read()
126 odata = a2b_qp(data, header = header)
127 output.write(odata)
128 return
129
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000130 new = b''
Jeremy Hylton77249442000-10-05 17:24:33 +0000131 while 1:
132 line = input.readline()
133 if not line: break
134 i, n = 0, len(line)
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000135 if n > 0 and line[n-1:n] == b'\n':
Jeremy Hylton77249442000-10-05 17:24:33 +0000136 partial = 0; n = n-1
137 # Strip trailing whitespace
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000138 while n > 0 and line[n-1:n] in b" \t\r":
Jeremy Hylton77249442000-10-05 17:24:33 +0000139 n = n-1
140 else:
141 partial = 1
142 while i < n:
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000143 c = line[i:i+1]
144 if c == b'_' and header:
145 new = new + b' '; i = i+1
Martin v. Löwis16dc7f42001-09-30 20:32:11 +0000146 elif c != ESCAPE:
Jeremy Hylton77249442000-10-05 17:24:33 +0000147 new = new + c; i = i+1
148 elif i+1 == n and not partial:
149 partial = 1; break
150 elif i+1 < n and line[i+1] == ESCAPE:
151 new = new + ESCAPE; i = i+2
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000152 elif i+2 < n and ishex(line[i+1:i+2]) and ishex(line[i+2:i+3]):
153 new = new + bytes((unhex(line[i+1:i+3]),)); i = i+3
Jeremy Hylton77249442000-10-05 17:24:33 +0000154 else: # Bad escape sequence -- leave it in
155 new = new + c; i = i+1
156 if not partial:
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000157 output.write(new + b'\n')
158 new = b''
Jeremy Hylton77249442000-10-05 17:24:33 +0000159 if new:
160 output.write(new)
Guido van Rossumf1945461995-06-14 23:43:44 +0000161
Martin v. Löwis16dc7f42001-09-30 20:32:11 +0000162def decodestring(s, header = 0):
163 if a2b_qp is not None:
164 return a2b_qp(s, header = header)
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000165 from io import BytesIO
166 infp = BytesIO(s)
167 outfp = BytesIO()
Martin v. Löwis16dc7f42001-09-30 20:32:11 +0000168 decode(infp, outfp, header = header)
Barry Warsaw9b630a52001-06-19 19:07:46 +0000169 return outfp.getvalue()
170
171
Tim Petersd1c29652001-07-02 04:57:30 +0000172
Barry Warsaw9b630a52001-06-19 19:07:46 +0000173# Other helper functions
Guido van Rossumf1945461995-06-14 23:43:44 +0000174def ishex(c):
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000175 """Return true if the byte ordinal 'c' is a hexadecimal digit in ASCII."""
176 assert isinstance(c, bytes)
177 return b'0' <= c <= b'9' or b'a' <= c <= b'f' or b'A' <= c <= b'F'
Guido van Rossumf1945461995-06-14 23:43:44 +0000178
179def unhex(s):
Jeremy Hylton77249442000-10-05 17:24:33 +0000180 """Get the integer value of a hexadecimal number."""
181 bits = 0
182 for c in s:
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000183 c = bytes((c,))
184 if b'0' <= c <= b'9':
Jeremy Hylton77249442000-10-05 17:24:33 +0000185 i = ord('0')
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000186 elif b'a' <= c <= b'f':
Jeremy Hylton77249442000-10-05 17:24:33 +0000187 i = ord('a')-10
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000188 elif b'A' <= c <= b'F':
189 i = ord(b'A')-10
Jeremy Hylton77249442000-10-05 17:24:33 +0000190 else:
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000191 assert False, "non-hex digit "+repr(c)
Jeremy Hylton77249442000-10-05 17:24:33 +0000192 bits = bits*16 + (ord(c) - i)
193 return bits
Guido van Rossumf1945461995-06-14 23:43:44 +0000194
Barry Warsaw9b630a52001-06-19 19:07:46 +0000195
Tim Petersd1c29652001-07-02 04:57:30 +0000196
Barry Warsaw9b630a52001-06-19 19:07:46 +0000197def main():
Jeremy Hylton77249442000-10-05 17:24:33 +0000198 import sys
199 import getopt
200 try:
201 opts, args = getopt.getopt(sys.argv[1:], 'td')
Guido van Rossumb940e112007-01-10 16:19:56 +0000202 except getopt.error as msg:
Jeremy Hylton77249442000-10-05 17:24:33 +0000203 sys.stdout = sys.stderr
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000204 print(msg)
205 print("usage: quopri [-t | -d] [file] ...")
206 print("-t: quote tabs")
207 print("-d: decode; default encode")
Jeremy Hylton77249442000-10-05 17:24:33 +0000208 sys.exit(2)
209 deco = 0
210 tabs = 0
211 for o, a in opts:
212 if o == '-t': tabs = 1
213 if o == '-d': deco = 1
214 if tabs and deco:
215 sys.stdout = sys.stderr
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000216 print("-t and -d are mutually exclusive")
Jeremy Hylton77249442000-10-05 17:24:33 +0000217 sys.exit(2)
218 if not args: args = ['-']
219 sts = 0
220 for file in args:
221 if file == '-':
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000222 fp = sys.stdin.buffer
Jeremy Hylton77249442000-10-05 17:24:33 +0000223 else:
224 try:
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000225 fp = open(file, "rb")
Guido van Rossumb940e112007-01-10 16:19:56 +0000226 except IOError as msg:
Jeremy Hylton77249442000-10-05 17:24:33 +0000227 sys.stderr.write("%s: can't open (%s)\n" % (file, msg))
228 sts = 1
229 continue
Guido van Rossum2dced8b2007-10-30 17:27:30 +0000230 try:
231 if deco:
232 decode(fp, sys.stdout.buffer)
233 else:
234 encode(fp, sys.stdout.buffer, tabs)
235 finally:
236 if file != '-':
237 fp.close()
Jeremy Hylton77249442000-10-05 17:24:33 +0000238 if sts:
239 sys.exit(sts)
Guido van Rossumf1945461995-06-14 23:43:44 +0000240
Barry Warsaw9b630a52001-06-19 19:07:46 +0000241
Tim Petersd1c29652001-07-02 04:57:30 +0000242
Guido van Rossumf1945461995-06-14 23:43:44 +0000243if __name__ == '__main__':
Barry Warsaw9b630a52001-06-19 19:07:46 +0000244 main()