blob: cbd979abdffc89a02fadb1f62f8a761572559ec8 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#! /usr/bin/env python3
Guido van Rossum105bd981997-07-11 18:39:03 +00002
Barry Warsaw9b630a52001-06-19 19:07:46 +00003"""Conversions to/from quoted-printable transport encoding as per RFC 1521."""
Guido van Rossume7b146f2000-02-04 15:28:42 +00004
Guido van Rossumf1945461995-06-14 23:43:44 +00005# (Dec 1991 version).
6
Barry Warsaw9b630a52001-06-19 19:07:46 +00007__all__ = ["encode", "decode", "encodestring", "decodestring"]
Skip Montanaroc62c81e2001-02-12 02:00:42 +00008
Martin v. Löwisc582bfc2007-07-28 17:52:25 +00009ESCAPE = b'='
Guido van Rossumf1945461995-06-14 23:43:44 +000010MAXLINESIZE = 76
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000011HEX = b'0123456789ABCDEF'
12EMPTYSTRING = b''
Guido van Rossumf1945461995-06-14 23:43:44 +000013
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000014try:
Tim Peters527e64f2001-10-04 05:36:56 +000015 from binascii import a2b_qp, b2a_qp
Brett Cannoncd171c82013-07-04 17:43:24 -040016except ImportError:
Tim Peters527e64f2001-10-04 05:36:56 +000017 a2b_qp = None
18 b2a_qp = None
Barry Warsaw9b630a52001-06-19 19:07:46 +000019
Tim Petersd1c29652001-07-02 04:57:30 +000020
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000021def needsquoting(c, quotetabs, header):
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000022 """Decide whether a particular byte ordinal needs to be quoted.
Guido van Rossume7b146f2000-02-04 15:28:42 +000023
Barry Warsaw9b630a52001-06-19 19:07:46 +000024 The 'quotetabs' flag indicates whether embedded tabs and spaces should be
25 quoted. Note that line-ending tabs and spaces are always encoded, as per
26 RFC 1521.
27 """
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000028 assert isinstance(c, bytes)
29 if c in b' \t':
Barry Warsaw9b630a52001-06-19 19:07:46 +000030 return quotetabs
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000031 # if header, we have to escape _ because _ is used to escape space
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000032 if c == b'_':
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000033 return header
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000034 return c == ESCAPE or not (b' ' <= c <= b'~')
Guido van Rossumf1945461995-06-14 23:43:44 +000035
36def quote(c):
Jeremy Hylton77249442000-10-05 17:24:33 +000037 """Quote a single character."""
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000038 assert isinstance(c, bytes) and len(c)==1
39 c = ord(c)
40 return ESCAPE + bytes((HEX[c//16], HEX[c%16]))
Guido van Rossumf1945461995-06-14 23:43:44 +000041
Barry Warsaw9b630a52001-06-19 19:07:46 +000042
Tim Petersd1c29652001-07-02 04:57:30 +000043
Georg Brandl0bb1cc72009-09-02 20:34:14 +000044def encode(input, output, quotetabs, header=False):
Jeremy Hylton77249442000-10-05 17:24:33 +000045 """Read 'input', apply quoted-printable encoding, and write to 'output'.
Guido van Rossume7b146f2000-02-04 15:28:42 +000046
Senthil Kumaran99597c42014-06-25 01:12:03 -070047 'input' and 'output' are binary file objects. The 'quotetabs' flag
48 indicates whether embedded tabs and spaces should be quoted. Note that
49 line-ending tabs and spaces are always encoded, as per RFC 1521.
50 The 'header' flag indicates whether we are encoding spaces as _ as per RFC
51 1522."""
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000052
53 if b2a_qp is not None:
54 data = input.read()
Georg Brandl0bb1cc72009-09-02 20:34:14 +000055 odata = b2a_qp(data, quotetabs=quotetabs, header=header)
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000056 output.write(odata)
57 return
Tim Peters527e64f2001-10-04 05:36:56 +000058
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000059 def write(s, output=output, lineEnd=b'\n'):
Barry Warsaw9b630a52001-06-19 19:07:46 +000060 # RFC 1521 requires that the line ending in a space or tab must have
61 # that trailing character encoded.
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000062 if s and s[-1:] in b' \t':
63 output.write(s[:-1] + quote(s[-1:]) + lineEnd)
64 elif s == b'.':
Guido van Rossum1346e832001-10-15 18:44:26 +000065 output.write(quote(s) + lineEnd)
Barry Warsaw9b630a52001-06-19 19:07:46 +000066 else:
67 output.write(s + lineEnd)
68
69 prevline = None
Jeremy Hylton77249442000-10-05 17:24:33 +000070 while 1:
71 line = input.readline()
72 if not line:
73 break
Barry Warsaw9b630a52001-06-19 19:07:46 +000074 outline = []
75 # Strip off any readline induced trailing newline
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000076 stripped = b''
77 if line[-1:] == b'\n':
Jeremy Hylton77249442000-10-05 17:24:33 +000078 line = line[:-1]
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000079 stripped = b'\n'
Barry Warsawdac67ac2001-06-19 22:48:10 +000080 # Calculate the un-length-limited encoded line
Jeremy Hylton77249442000-10-05 17:24:33 +000081 for c in line:
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000082 c = bytes((c,))
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000083 if needsquoting(c, quotetabs, header):
Jeremy Hylton77249442000-10-05 17:24:33 +000084 c = quote(c)
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000085 if header and c == b' ':
86 outline.append(b'_')
Martin v. Löwis16dc7f42001-09-30 20:32:11 +000087 else:
88 outline.append(c)
Barry Warsawdac67ac2001-06-19 22:48:10 +000089 # First, write out the previous line
Barry Warsaw9b630a52001-06-19 19:07:46 +000090 if prevline is not None:
91 write(prevline)
Barry Warsawdac67ac2001-06-19 22:48:10 +000092 # Now see if we need any soft line breaks because of RFC-imposed
93 # length limitations. Then do the thisline->prevline dance.
94 thisline = EMPTYSTRING.join(outline)
95 while len(thisline) > MAXLINESIZE:
96 # Don't forget to include the soft line break `=' sign in the
97 # length calculation!
Martin v. Löwisc582bfc2007-07-28 17:52:25 +000098 write(thisline[:MAXLINESIZE-1], lineEnd=b'=\n')
Barry Warsawdac67ac2001-06-19 22:48:10 +000099 thisline = thisline[MAXLINESIZE-1:]
100 # Write out the current line
101 prevline = thisline
Barry Warsaw9b630a52001-06-19 19:07:46 +0000102 # Write out the last line, without a trailing newline
103 if prevline is not None:
104 write(prevline, lineEnd=stripped)
Guido van Rossumf1945461995-06-14 23:43:44 +0000105
Georg Brandl0bb1cc72009-09-02 20:34:14 +0000106def encodestring(s, quotetabs=False, header=False):
Martin v. Löwis16dc7f42001-09-30 20:32:11 +0000107 if b2a_qp is not None:
Georg Brandl0bb1cc72009-09-02 20:34:14 +0000108 return b2a_qp(s, quotetabs=quotetabs, header=header)
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000109 from io import BytesIO
110 infp = BytesIO(s)
111 outfp = BytesIO()
Martin v. Löwis16dc7f42001-09-30 20:32:11 +0000112 encode(infp, outfp, quotetabs, header)
Barry Warsaw9b630a52001-06-19 19:07:46 +0000113 return outfp.getvalue()
114
115
Tim Petersd1c29652001-07-02 04:57:30 +0000116
Georg Brandl0bb1cc72009-09-02 20:34:14 +0000117def decode(input, output, header=False):
Jeremy Hylton77249442000-10-05 17:24:33 +0000118 """Read 'input', apply quoted-printable decoding, and write to 'output'.
Senthil Kumaran99597c42014-06-25 01:12:03 -0700119 'input' and 'output' are binary file objects.
Martin v. Löwis16dc7f42001-09-30 20:32:11 +0000120 If 'header' is true, decode underscore as space (per RFC 1522)."""
Guido van Rossume7b146f2000-02-04 15:28:42 +0000121
Martin v. Löwis16dc7f42001-09-30 20:32:11 +0000122 if a2b_qp is not None:
123 data = input.read()
Georg Brandl0bb1cc72009-09-02 20:34:14 +0000124 odata = a2b_qp(data, header=header)
Martin v. Löwis16dc7f42001-09-30 20:32:11 +0000125 output.write(odata)
126 return
127
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000128 new = b''
Jeremy Hylton77249442000-10-05 17:24:33 +0000129 while 1:
130 line = input.readline()
131 if not line: break
132 i, n = 0, len(line)
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000133 if n > 0 and line[n-1:n] == b'\n':
Jeremy Hylton77249442000-10-05 17:24:33 +0000134 partial = 0; n = n-1
135 # Strip trailing whitespace
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000136 while n > 0 and line[n-1:n] in b" \t\r":
Jeremy Hylton77249442000-10-05 17:24:33 +0000137 n = n-1
138 else:
139 partial = 1
140 while i < n:
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000141 c = line[i:i+1]
142 if c == b'_' and header:
143 new = new + b' '; i = i+1
Martin v. Löwis16dc7f42001-09-30 20:32:11 +0000144 elif c != ESCAPE:
Jeremy Hylton77249442000-10-05 17:24:33 +0000145 new = new + c; i = i+1
146 elif i+1 == n and not partial:
147 partial = 1; break
Serhiy Storchaka74a49ac2015-03-20 16:46:19 +0200148 elif i+1 < n and line[i+1:i+2] == ESCAPE:
Jeremy Hylton77249442000-10-05 17:24:33 +0000149 new = new + ESCAPE; i = i+2
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000150 elif i+2 < n and ishex(line[i+1:i+2]) and ishex(line[i+2:i+3]):
151 new = new + bytes((unhex(line[i+1:i+3]),)); i = i+3
Jeremy Hylton77249442000-10-05 17:24:33 +0000152 else: # Bad escape sequence -- leave it in
153 new = new + c; i = i+1
154 if not partial:
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000155 output.write(new + b'\n')
156 new = b''
Jeremy Hylton77249442000-10-05 17:24:33 +0000157 if new:
158 output.write(new)
Guido van Rossumf1945461995-06-14 23:43:44 +0000159
Georg Brandl0bb1cc72009-09-02 20:34:14 +0000160def decodestring(s, header=False):
Martin v. Löwis16dc7f42001-09-30 20:32:11 +0000161 if a2b_qp is not None:
Georg Brandl0bb1cc72009-09-02 20:34:14 +0000162 return a2b_qp(s, header=header)
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000163 from io import BytesIO
164 infp = BytesIO(s)
165 outfp = BytesIO()
Georg Brandl0bb1cc72009-09-02 20:34:14 +0000166 decode(infp, outfp, header=header)
Barry Warsaw9b630a52001-06-19 19:07:46 +0000167 return outfp.getvalue()
168
169
Tim Petersd1c29652001-07-02 04:57:30 +0000170
Barry Warsaw9b630a52001-06-19 19:07:46 +0000171# Other helper functions
Guido van Rossumf1945461995-06-14 23:43:44 +0000172def ishex(c):
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000173 """Return true if the byte ordinal 'c' is a hexadecimal digit in ASCII."""
174 assert isinstance(c, bytes)
175 return b'0' <= c <= b'9' or b'a' <= c <= b'f' or b'A' <= c <= b'F'
Guido van Rossumf1945461995-06-14 23:43:44 +0000176
177def unhex(s):
Jeremy Hylton77249442000-10-05 17:24:33 +0000178 """Get the integer value of a hexadecimal number."""
179 bits = 0
180 for c in s:
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000181 c = bytes((c,))
182 if b'0' <= c <= b'9':
Jeremy Hylton77249442000-10-05 17:24:33 +0000183 i = ord('0')
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000184 elif b'a' <= c <= b'f':
Jeremy Hylton77249442000-10-05 17:24:33 +0000185 i = ord('a')-10
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000186 elif b'A' <= c <= b'F':
187 i = ord(b'A')-10
Jeremy Hylton77249442000-10-05 17:24:33 +0000188 else:
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000189 assert False, "non-hex digit "+repr(c)
Jeremy Hylton77249442000-10-05 17:24:33 +0000190 bits = bits*16 + (ord(c) - i)
191 return bits
Guido van Rossumf1945461995-06-14 23:43:44 +0000192
Barry Warsaw9b630a52001-06-19 19:07:46 +0000193
Tim Petersd1c29652001-07-02 04:57:30 +0000194
Barry Warsaw9b630a52001-06-19 19:07:46 +0000195def main():
Jeremy Hylton77249442000-10-05 17:24:33 +0000196 import sys
197 import getopt
198 try:
199 opts, args = getopt.getopt(sys.argv[1:], 'td')
Guido van Rossumb940e112007-01-10 16:19:56 +0000200 except getopt.error as msg:
Jeremy Hylton77249442000-10-05 17:24:33 +0000201 sys.stdout = sys.stderr
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000202 print(msg)
203 print("usage: quopri [-t | -d] [file] ...")
204 print("-t: quote tabs")
205 print("-d: decode; default encode")
Jeremy Hylton77249442000-10-05 17:24:33 +0000206 sys.exit(2)
207 deco = 0
208 tabs = 0
209 for o, a in opts:
210 if o == '-t': tabs = 1
211 if o == '-d': deco = 1
212 if tabs and deco:
213 sys.stdout = sys.stderr
Guido van Rossumbe19ed72007-02-09 05:37:30 +0000214 print("-t and -d are mutually exclusive")
Jeremy Hylton77249442000-10-05 17:24:33 +0000215 sys.exit(2)
216 if not args: args = ['-']
217 sts = 0
218 for file in args:
219 if file == '-':
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000220 fp = sys.stdin.buffer
Jeremy Hylton77249442000-10-05 17:24:33 +0000221 else:
222 try:
Martin v. Löwisc582bfc2007-07-28 17:52:25 +0000223 fp = open(file, "rb")
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200224 except OSError as msg:
Jeremy Hylton77249442000-10-05 17:24:33 +0000225 sys.stderr.write("%s: can't open (%s)\n" % (file, msg))
226 sts = 1
227 continue
Guido van Rossum2dced8b2007-10-30 17:27:30 +0000228 try:
229 if deco:
230 decode(fp, sys.stdout.buffer)
231 else:
232 encode(fp, sys.stdout.buffer, tabs)
233 finally:
234 if file != '-':
235 fp.close()
Jeremy Hylton77249442000-10-05 17:24:33 +0000236 if sts:
237 sys.exit(sts)
Guido van Rossumf1945461995-06-14 23:43:44 +0000238
Barry Warsaw9b630a52001-06-19 19:07:46 +0000239
Tim Petersd1c29652001-07-02 04:57:30 +0000240
Guido van Rossumf1945461995-06-14 23:43:44 +0000241if __name__ == '__main__':
Barry Warsaw9b630a52001-06-19 19:07:46 +0000242 main()