blob: f362bae10d3b35778d4fb66b98a194d297425453 [file] [log] [blame]
Guido van Rossum0229bf62000-03-10 23:17:24 +00001""" Encoding Aliases Support
2
3 This module is used by the encodings package search function to
4 map encodings names to module names.
5
Marc-André Lemburga40ea752002-10-04 11:58:24 +00006 Note that the search function normalizes the encoding names before
7 doing the lookup, so the mapping will have to map normalized
8 encoding names to module names.
Marc-André Lemburg462004e2002-02-10 21:36:20 +00009
10 Contents:
11
12 The following aliases dictionary contains mappings of all IANA
13 character set names for which the Python core library provides
14 codecs. In addition to these, a few Python specific codec
15 aliases have also been added.
16
Guido van Rossum0229bf62000-03-10 23:17:24 +000017"""
18aliases = {
19
Marc-André Lemburg462004e2002-02-10 21:36:20 +000020 # ascii codec
21 '646' : 'ascii',
Marc-André Lemburga0af63b2002-02-11 17:43:46 +000022 'ansi_x3.4_1968' : 'ascii',
Marc-André Lemburg8dc5ff22002-10-04 16:30:42 +000023 'ansi_x3_4_1968' : 'ascii', # some email headers use this non-standard name
Marc-André Lemburga0af63b2002-02-11 17:43:46 +000024 'ansi_x3.4_1986' : 'ascii',
Marc-André Lemburg462004e2002-02-10 21:36:20 +000025 'cp367' : 'ascii',
26 'csascii' : 'ascii',
27 'ibm367' : 'ascii',
28 'iso646_us' : 'ascii',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +000029 'iso_646.irv_1991' : 'ascii',
Marc-André Lemburg462004e2002-02-10 21:36:20 +000030 'iso_ir_6' : 'ascii',
31 'us' : 'ascii',
32 'us_ascii' : 'ascii',
Guido van Rossum0229bf62000-03-10 23:17:24 +000033
Marc-André Lemburg462004e2002-02-10 21:36:20 +000034 # base64_codec codec
35 'base64' : 'base64_codec',
36 'base_64' : 'base64_codec',
Guido van Rossum0229bf62000-03-10 23:17:24 +000037
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000038 # big5 codec
39 'big5_tw' : 'big5',
40 'csbig5' : 'big5',
41
Raymond Hettinger9a80c5d2003-09-23 20:21:01 +000042 # bz2_codec codec
43 'bz2' : 'bz2_codec',
44
Marc-André Lemburg462004e2002-02-10 21:36:20 +000045 # cp037 codec
Marc-André Lemburg5c94d332004-01-20 09:38:52 +000046 '037' : 'cp037',
Marc-André Lemburg462004e2002-02-10 21:36:20 +000047 'csibm037' : 'cp037',
48 'ebcdic_cp_ca' : 'cp037',
49 'ebcdic_cp_nl' : 'cp037',
50 'ebcdic_cp_us' : 'cp037',
51 'ebcdic_cp_wt' : 'cp037',
52 'ibm037' : 'cp037',
53 'ibm039' : 'cp037',
Guido van Rossum0229bf62000-03-10 23:17:24 +000054
Marc-André Lemburg462004e2002-02-10 21:36:20 +000055 # cp1026 codec
Marc-André Lemburg5c94d332004-01-20 09:38:52 +000056 '1026' : 'cp1026',
Marc-André Lemburg462004e2002-02-10 21:36:20 +000057 'csibm1026' : 'cp1026',
58 'ibm1026' : 'cp1026',
Guido van Rossum0229bf62000-03-10 23:17:24 +000059
Marc-André Lemburg462004e2002-02-10 21:36:20 +000060 # cp1140 codec
Marc-André Lemburg5c94d332004-01-20 09:38:52 +000061 '1140' : 'cp1140',
Marc-André Lemburg462004e2002-02-10 21:36:20 +000062 'ibm1140' : 'cp1140',
Mark Hammond194bfb22001-06-04 02:31:23 +000063
Marc-André Lemburg462004e2002-02-10 21:36:20 +000064 # cp1250 codec
Marc-André Lemburg5c94d332004-01-20 09:38:52 +000065 '1250' : 'cp1250',
Marc-André Lemburg462004e2002-02-10 21:36:20 +000066 'windows_1250' : 'cp1250',
Guido van Rossum9e896b32000-04-05 20:11:21 +000067
Marc-André Lemburg462004e2002-02-10 21:36:20 +000068 # cp1251 codec
Marc-André Lemburg5c94d332004-01-20 09:38:52 +000069 '1251' : 'cp1251',
Marc-André Lemburg462004e2002-02-10 21:36:20 +000070 'windows_1251' : 'cp1251',
Marc-André Lemburg4fd73f02000-06-07 09:12:30 +000071
Marc-André Lemburg462004e2002-02-10 21:36:20 +000072 # cp1252 codec
Marc-André Lemburg5c94d332004-01-20 09:38:52 +000073 '1252' : 'cp1252',
Marc-André Lemburg462004e2002-02-10 21:36:20 +000074 'windows_1252' : 'cp1252',
Marc-André Lemburg4fd73f02000-06-07 09:12:30 +000075
Marc-André Lemburg462004e2002-02-10 21:36:20 +000076 # cp1253 codec
Marc-André Lemburg5c94d332004-01-20 09:38:52 +000077 '1253' : 'cp1253',
Marc-André Lemburg462004e2002-02-10 21:36:20 +000078 'windows_1253' : 'cp1253',
79
80 # cp1254 codec
Marc-André Lemburg5c94d332004-01-20 09:38:52 +000081 '1254' : 'cp1254',
Marc-André Lemburg462004e2002-02-10 21:36:20 +000082 'windows_1254' : 'cp1254',
83
84 # cp1255 codec
Marc-André Lemburg5c94d332004-01-20 09:38:52 +000085 '1255' : 'cp1255',
Marc-André Lemburg462004e2002-02-10 21:36:20 +000086 'windows_1255' : 'cp1255',
87
88 # cp1256 codec
Marc-André Lemburg5c94d332004-01-20 09:38:52 +000089 '1256' : 'cp1256',
Marc-André Lemburg462004e2002-02-10 21:36:20 +000090 'windows_1256' : 'cp1256',
91
92 # cp1257 codec
Marc-André Lemburg5c94d332004-01-20 09:38:52 +000093 '1257' : 'cp1257',
Marc-André Lemburg462004e2002-02-10 21:36:20 +000094 'windows_1257' : 'cp1257',
95
96 # cp1258 codec
Marc-André Lemburg5c94d332004-01-20 09:38:52 +000097 '1258' : 'cp1258',
Marc-André Lemburg462004e2002-02-10 21:36:20 +000098 'windows_1258' : 'cp1258',
99
100 # cp424 codec
Marc-André Lemburg5c94d332004-01-20 09:38:52 +0000101 '424' : 'cp424',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000102 'csibm424' : 'cp424',
103 'ebcdic_cp_he' : 'cp424',
104 'ibm424' : 'cp424',
105
106 # cp437 codec
107 '437' : 'cp437',
108 'cspc8codepage437' : 'cp437',
109 'ibm437' : 'cp437',
110
111 # cp500 codec
Marc-André Lemburg5c94d332004-01-20 09:38:52 +0000112 '500' : 'cp500',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000113 'csibm500' : 'cp500',
114 'ebcdic_cp_be' : 'cp500',
115 'ebcdic_cp_ch' : 'cp500',
116 'ibm500' : 'cp500',
117
118 # cp775 codec
Marc-André Lemburg5c94d332004-01-20 09:38:52 +0000119 '775' : 'cp775',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000120 'cspc775baltic' : 'cp775',
121 'ibm775' : 'cp775',
122
123 # cp850 codec
124 '850' : 'cp850',
125 'cspc850multilingual' : 'cp850',
126 'ibm850' : 'cp850',
127
128 # cp852 codec
129 '852' : 'cp852',
130 'cspcp852' : 'cp852',
131 'ibm852' : 'cp852',
132
133 # cp855 codec
134 '855' : 'cp855',
135 'csibm855' : 'cp855',
136 'ibm855' : 'cp855',
137
138 # cp857 codec
139 '857' : 'cp857',
140 'csibm857' : 'cp857',
141 'ibm857' : 'cp857',
142
143 # cp860 codec
144 '860' : 'cp860',
145 'csibm860' : 'cp860',
146 'ibm860' : 'cp860',
147
148 # cp861 codec
149 '861' : 'cp861',
150 'cp_is' : 'cp861',
151 'csibm861' : 'cp861',
152 'ibm861' : 'cp861',
153
154 # cp862 codec
155 '862' : 'cp862',
156 'cspc862latinhebrew' : 'cp862',
157 'ibm862' : 'cp862',
158
159 # cp863 codec
160 '863' : 'cp863',
161 'csibm863' : 'cp863',
162 'ibm863' : 'cp863',
163
164 # cp864 codec
Marc-André Lemburg5c94d332004-01-20 09:38:52 +0000165 '864' : 'cp864',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000166 'csibm864' : 'cp864',
167 'ibm864' : 'cp864',
168
169 # cp865 codec
170 '865' : 'cp865',
171 'csibm865' : 'cp865',
172 'ibm865' : 'cp865',
173
174 # cp866 codec
175 '866' : 'cp866',
176 'csibm866' : 'cp866',
177 'ibm866' : 'cp866',
178
179 # cp869 codec
180 '869' : 'cp869',
181 'cp_gr' : 'cp869',
182 'csibm869' : 'cp869',
183 'ibm869' : 'cp869',
184
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000185 # cp932 codec
186 '932' : 'cp932',
187 'ms932' : 'cp932',
188 'mskanji' : 'cp932',
189 'ms_kanji' : 'cp932',
190
191 # cp949 codec
192 '949' : 'cp949',
193 'ms949' : 'cp949',
194 'uhc' : 'cp949',
195
196 # cp950 codec
197 '950' : 'cp950',
198 'ms950' : 'cp950',
199
200 # euc_jisx0213 codec
201 'jisx0213' : 'euc_jisx0213',
202 'eucjisx0213' : 'euc_jisx0213',
203
204 # euc_jp codec
205 'eucjp' : 'euc_jp',
206 'ujis' : 'euc_jp',
207 'u_jis' : 'euc_jp',
208
209 # euc_kr codec
210 'euckr' : 'euc_kr',
211 'korean' : 'euc_kr',
212 'ksc5601' : 'euc_kr',
213 'ks_c_5601' : 'euc_kr',
214 'ks_c_5601_1987' : 'euc_kr',
215 'ksx1001' : 'euc_kr',
216 'ks_x_1001' : 'euc_kr',
217
218 # gb18030 codec
219 'gb18030_2000' : 'gb18030',
220
221 # gb2312 codec
222 'chinese' : 'gb2312',
223 'csiso58gb231280' : 'gb2312',
224 'euc_cn' : 'gb2312',
225 'euccn' : 'gb2312',
226 'eucgb2312_cn' : 'gb2312',
227 'gb2312_1980' : 'gb2312',
228 'gb2312_80' : 'gb2312',
229 'iso_ir_58' : 'gb2312',
230
231 # gbk codec
232 '936' : 'gbk',
233 'cp936' : 'gbk',
234 'ms936' : 'gbk',
235
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000236 # hex_codec codec
237 'hex' : 'hex_codec',
238
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000239 # hz codec
240 'hzgb' : 'hz',
241 'hz_gb' : 'hz',
242 'hz_gb_2312' : 'hz',
243
244 # iso2022_jp codec
245 'csiso2022jp' : 'iso2022_jp',
246 'iso2022jp' : 'iso2022_jp',
247 'iso_2022_jp' : 'iso2022_jp',
248
249 # iso2022_jp_1 codec
250 'iso2022jp_1' : 'iso2022_jp_1',
251 'iso_2022_jp_1' : 'iso2022_jp_1',
252
253 # iso2022_jp_2 codec
254 'iso2022jp_2' : 'iso2022_jp_2',
255 'iso_2022_jp_2' : 'iso2022_jp_2',
256
Hye-Shik Changb619e4b2004-01-20 09:33:30 +0000257 # iso2022_jp_3 codec
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000258 'iso2022jp_3' : 'iso2022_jp_3',
259 'iso_2022_jp_3' : 'iso2022_jp_3',
260
261 # iso2022_jp_ext codec
262 'iso2022jp_ext' : 'iso2022_jp_ext',
263 'iso_2022_jp_ext' : 'iso2022_jp_ext',
264
265 # iso2022_kr codec
266 'csiso2022kr' : 'iso2022_kr',
267 'iso2022kr' : 'iso2022_kr',
268 'iso_2022_kr' : 'iso2022_kr',
269
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000270 # iso8859_10 codec
271 'csisolatin6' : 'iso8859_10',
272 'iso_8859_10' : 'iso8859_10',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000273 'iso_8859_10_1992' : 'iso8859_10',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000274 'iso_ir_157' : 'iso8859_10',
275 'l6' : 'iso8859_10',
276 'latin6' : 'iso8859_10',
277
278 # iso8859_13 codec
279 'iso_8859_13' : 'iso8859_13',
280
281 # iso8859_14 codec
282 'iso_8859_14' : 'iso8859_14',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000283 'iso_8859_14_1998' : 'iso8859_14',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000284 'iso_celtic' : 'iso8859_14',
285 'iso_ir_199' : 'iso8859_14',
286 'l8' : 'iso8859_14',
287 'latin8' : 'iso8859_14',
288
289 # iso8859_15 codec
290 'iso_8859_15' : 'iso8859_15',
291
292 # iso8859_2 codec
293 'csisolatin2' : 'iso8859_2',
294 'iso_8859_2' : 'iso8859_2',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000295 'iso_8859_2_1987' : 'iso8859_2',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000296 'iso_ir_101' : 'iso8859_2',
297 'l2' : 'iso8859_2',
298 'latin2' : 'iso8859_2',
299
300 # iso8859_3 codec
301 'csisolatin3' : 'iso8859_3',
302 'iso_8859_3' : 'iso8859_3',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000303 'iso_8859_3_1988' : 'iso8859_3',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000304 'iso_ir_109' : 'iso8859_3',
305 'l3' : 'iso8859_3',
306 'latin3' : 'iso8859_3',
307
308 # iso8859_4 codec
309 'csisolatin4' : 'iso8859_4',
310 'iso_8859_4' : 'iso8859_4',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000311 'iso_8859_4_1988' : 'iso8859_4',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000312 'iso_ir_110' : 'iso8859_4',
313 'l4' : 'iso8859_4',
314 'latin4' : 'iso8859_4',
315
316 # iso8859_5 codec
317 'csisolatincyrillic' : 'iso8859_5',
318 'cyrillic' : 'iso8859_5',
319 'iso_8859_5' : 'iso8859_5',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000320 'iso_8859_5_1988' : 'iso8859_5',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000321 'iso_ir_144' : 'iso8859_5',
322
323 # iso8859_6 codec
324 'arabic' : 'iso8859_6',
325 'asmo_708' : 'iso8859_6',
326 'csisolatinarabic' : 'iso8859_6',
327 'ecma_114' : 'iso8859_6',
328 'iso_8859_6' : 'iso8859_6',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000329 'iso_8859_6_1987' : 'iso8859_6',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000330 'iso_ir_127' : 'iso8859_6',
331
332 # iso8859_7 codec
333 'csisolatingreek' : 'iso8859_7',
334 'ecma_118' : 'iso8859_7',
335 'elot_928' : 'iso8859_7',
336 'greek' : 'iso8859_7',
337 'greek8' : 'iso8859_7',
338 'iso_8859_7' : 'iso8859_7',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000339 'iso_8859_7_1987' : 'iso8859_7',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000340 'iso_ir_126' : 'iso8859_7',
341
342 # iso8859_8 codec
343 'csisolatinhebrew' : 'iso8859_8',
344 'hebrew' : 'iso8859_8',
345 'iso_8859_8' : 'iso8859_8',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000346 'iso_8859_8_1988' : 'iso8859_8',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000347 'iso_ir_138' : 'iso8859_8',
348
349 # iso8859_9 codec
350 'csisolatin5' : 'iso8859_9',
351 'iso_8859_9' : 'iso8859_9',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000352 'iso_8859_9_1989' : 'iso8859_9',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000353 'iso_ir_148' : 'iso8859_9',
354 'l5' : 'iso8859_9',
355 'latin5' : 'iso8859_9',
356
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000357 # johab codec
358 'cp1361' : 'johab',
359 'ms1361' : 'johab',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000360
361 # koi8_r codec
362 'cskoi8r' : 'koi8_r',
363
364 # latin_1 codec
365 '8859' : 'latin_1',
366 'cp819' : 'latin_1',
367 'csisolatin1' : 'latin_1',
368 'ibm819' : 'latin_1',
369 'iso8859' : 'latin_1',
370 'iso_8859_1' : 'latin_1',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000371 'iso_8859_1_1987' : 'latin_1',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000372 'iso_ir_100' : 'latin_1',
373 'l1' : 'latin_1',
374 'latin' : 'latin_1',
375 'latin1' : 'latin_1',
376
377 # mac_cyrillic codec
378 'maccyrillic' : 'mac_cyrillic',
379
380 # mac_greek codec
381 'macgreek' : 'mac_greek',
382
383 # mac_iceland codec
384 'maciceland' : 'mac_iceland',
385
386 # mac_latin2 codec
387 'maccentraleurope' : 'mac_latin2',
388 'maclatin2' : 'mac_latin2',
389
390 # mac_roman codec
391 'macroman' : 'mac_roman',
392
393 # mac_turkish codec
394 'macturkish' : 'mac_turkish',
395
396 # mbcs codec
397 'dbcs' : 'mbcs',
398
399 # quopri_codec codec
400 'quopri' : 'quopri_codec',
401 'quoted_printable' : 'quopri_codec',
402 'quotedprintable' : 'quopri_codec',
403
404 # rot_13 codec
405 'rot13' : 'rot_13',
406
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000407 # shift_jis codec
408 'csshiftjis' : 'shift_jis',
409 'shiftjis' : 'shift_jis',
410 'sjis' : 'shift_jis',
411 's_jis' : 'shift_jis',
412
413 # shift_jisx0213 codec
414 'shiftjisx0213' : 'shift_jisx0213',
415 'sjisx0213' : 'shift_jisx0213',
416 's_jisx0213' : 'shift_jisx0213',
417
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000418 # tactis codec
419 'tis260' : 'tactis',
420
421 # utf_16 codec
422 'u16' : 'utf_16',
423 'utf16' : 'utf_16',
424
425 # utf_16_be codec
426 'unicodebigunmarked' : 'utf_16_be',
427 'utf_16be' : 'utf_16_be',
428
429 # utf_16_le codec
430 'unicodelittleunmarked' : 'utf_16_le',
431 'utf_16le' : 'utf_16_le',
432
433 # utf_7 codec
434 'u7' : 'utf_7',
435 'utf7' : 'utf_7',
436
437 # utf_8 codec
438 'u8' : 'utf_8',
439 'utf' : 'utf_8',
440 'utf8' : 'utf_8',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000441 'utf8_ucs2' : 'utf_8',
442 'utf8_ucs4' : 'utf_8',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000443
444 # uu_codec codec
445 'uu' : 'uu_codec',
446
447 # zlib_codec codec
448 'zip' : 'zlib_codec',
449 'zlib' : 'zlib_codec',
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450
Guido van Rossum0229bf62000-03-10 23:17:24 +0000451}