blob: 0594171c09f41b255916463aac01e45d1ae8a88a [file] [log] [blame]
Guido van Rossum0229bf62000-03-10 23:17:24 +00001""" Encoding Aliases Support
2
3 This module is used by the encodings package search function to
4 map encodings names to module names.
5
Marc-André Lemburga40ea752002-10-04 11:58:24 +00006 Note that the search function normalizes the encoding names before
7 doing the lookup, so the mapping will have to map normalized
8 encoding names to module names.
Marc-André Lemburg462004e2002-02-10 21:36:20 +00009
10 Contents:
11
12 The following aliases dictionary contains mappings of all IANA
13 character set names for which the Python core library provides
14 codecs. In addition to these, a few Python specific codec
15 aliases have also been added.
16
Guido van Rossum0229bf62000-03-10 23:17:24 +000017"""
18aliases = {
19
Marc-André Lemburg462004e2002-02-10 21:36:20 +000020 # ascii codec
21 '646' : 'ascii',
Marc-André Lemburga0af63b2002-02-11 17:43:46 +000022 'ansi_x3.4_1968' : 'ascii',
Marc-André Lemburg8dc5ff22002-10-04 16:30:42 +000023 'ansi_x3_4_1968' : 'ascii', # some email headers use this non-standard name
Marc-André Lemburga0af63b2002-02-11 17:43:46 +000024 'ansi_x3.4_1986' : 'ascii',
Marc-André Lemburg462004e2002-02-10 21:36:20 +000025 'cp367' : 'ascii',
26 'csascii' : 'ascii',
27 'ibm367' : 'ascii',
28 'iso646_us' : 'ascii',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +000029 'iso_646.irv_1991' : 'ascii',
Marc-André Lemburg462004e2002-02-10 21:36:20 +000030 'iso_ir_6' : 'ascii',
31 'us' : 'ascii',
32 'us_ascii' : 'ascii',
Guido van Rossum0229bf62000-03-10 23:17:24 +000033
Marc-André Lemburg462004e2002-02-10 21:36:20 +000034 # base64_codec codec
35 'base64' : 'base64_codec',
36 'base_64' : 'base64_codec',
Guido van Rossum0229bf62000-03-10 23:17:24 +000037
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000038 # big5 codec
39 'big5_tw' : 'big5',
40 'csbig5' : 'big5',
41
Raymond Hettinger9a80c5d2003-09-23 20:21:01 +000042 # bz2_codec codec
43 'bz2' : 'bz2_codec',
44
Marc-André Lemburg462004e2002-02-10 21:36:20 +000045 # cp037 codec
46 'csibm037' : 'cp037',
47 'ebcdic_cp_ca' : 'cp037',
48 'ebcdic_cp_nl' : 'cp037',
49 'ebcdic_cp_us' : 'cp037',
50 'ebcdic_cp_wt' : 'cp037',
51 'ibm037' : 'cp037',
52 'ibm039' : 'cp037',
Guido van Rossum0229bf62000-03-10 23:17:24 +000053
Marc-André Lemburg462004e2002-02-10 21:36:20 +000054 # cp1026 codec
55 'csibm1026' : 'cp1026',
56 'ibm1026' : 'cp1026',
Guido van Rossum0229bf62000-03-10 23:17:24 +000057
Marc-André Lemburg462004e2002-02-10 21:36:20 +000058 # cp1140 codec
59 'ibm1140' : 'cp1140',
Mark Hammond194bfb22001-06-04 02:31:23 +000060
Marc-André Lemburg462004e2002-02-10 21:36:20 +000061 # cp1250 codec
62 'windows_1250' : 'cp1250',
Guido van Rossum9e896b32000-04-05 20:11:21 +000063
Marc-André Lemburg462004e2002-02-10 21:36:20 +000064 # cp1251 codec
65 'windows_1251' : 'cp1251',
Marc-André Lemburg4fd73f02000-06-07 09:12:30 +000066
Marc-André Lemburg462004e2002-02-10 21:36:20 +000067 # cp1252 codec
68 'windows_1252' : 'cp1252',
Marc-André Lemburg4fd73f02000-06-07 09:12:30 +000069
Marc-André Lemburg462004e2002-02-10 21:36:20 +000070 # cp1253 codec
71 'windows_1253' : 'cp1253',
72
73 # cp1254 codec
74 'windows_1254' : 'cp1254',
75
76 # cp1255 codec
77 'windows_1255' : 'cp1255',
78
79 # cp1256 codec
80 'windows_1256' : 'cp1256',
81
82 # cp1257 codec
83 'windows_1257' : 'cp1257',
84
85 # cp1258 codec
86 'windows_1258' : 'cp1258',
87
88 # cp424 codec
89 'csibm424' : 'cp424',
90 'ebcdic_cp_he' : 'cp424',
91 'ibm424' : 'cp424',
92
93 # cp437 codec
94 '437' : 'cp437',
95 'cspc8codepage437' : 'cp437',
96 'ibm437' : 'cp437',
97
98 # cp500 codec
99 'csibm500' : 'cp500',
100 'ebcdic_cp_be' : 'cp500',
101 'ebcdic_cp_ch' : 'cp500',
102 'ibm500' : 'cp500',
103
104 # cp775 codec
105 'cspc775baltic' : 'cp775',
106 'ibm775' : 'cp775',
107
108 # cp850 codec
109 '850' : 'cp850',
110 'cspc850multilingual' : 'cp850',
111 'ibm850' : 'cp850',
112
113 # cp852 codec
114 '852' : 'cp852',
115 'cspcp852' : 'cp852',
116 'ibm852' : 'cp852',
117
118 # cp855 codec
119 '855' : 'cp855',
120 'csibm855' : 'cp855',
121 'ibm855' : 'cp855',
122
123 # cp857 codec
124 '857' : 'cp857',
125 'csibm857' : 'cp857',
126 'ibm857' : 'cp857',
127
128 # cp860 codec
129 '860' : 'cp860',
130 'csibm860' : 'cp860',
131 'ibm860' : 'cp860',
132
133 # cp861 codec
134 '861' : 'cp861',
135 'cp_is' : 'cp861',
136 'csibm861' : 'cp861',
137 'ibm861' : 'cp861',
138
139 # cp862 codec
140 '862' : 'cp862',
141 'cspc862latinhebrew' : 'cp862',
142 'ibm862' : 'cp862',
143
144 # cp863 codec
145 '863' : 'cp863',
146 'csibm863' : 'cp863',
147 'ibm863' : 'cp863',
148
149 # cp864 codec
150 'csibm864' : 'cp864',
151 'ibm864' : 'cp864',
152
153 # cp865 codec
154 '865' : 'cp865',
155 'csibm865' : 'cp865',
156 'ibm865' : 'cp865',
157
158 # cp866 codec
159 '866' : 'cp866',
160 'csibm866' : 'cp866',
161 'ibm866' : 'cp866',
162
163 # cp869 codec
164 '869' : 'cp869',
165 'cp_gr' : 'cp869',
166 'csibm869' : 'cp869',
167 'ibm869' : 'cp869',
168
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000169 # cp932 codec
170 '932' : 'cp932',
171 'ms932' : 'cp932',
172 'mskanji' : 'cp932',
173 'ms_kanji' : 'cp932',
174
175 # cp949 codec
176 '949' : 'cp949',
177 'ms949' : 'cp949',
178 'uhc' : 'cp949',
179
180 # cp950 codec
181 '950' : 'cp950',
182 'ms950' : 'cp950',
183
184 # euc_jisx0213 codec
185 'jisx0213' : 'euc_jisx0213',
186 'eucjisx0213' : 'euc_jisx0213',
187
188 # euc_jp codec
189 'eucjp' : 'euc_jp',
190 'ujis' : 'euc_jp',
191 'u_jis' : 'euc_jp',
192
193 # euc_kr codec
194 'euckr' : 'euc_kr',
195 'korean' : 'euc_kr',
196 'ksc5601' : 'euc_kr',
197 'ks_c_5601' : 'euc_kr',
198 'ks_c_5601_1987' : 'euc_kr',
199 'ksx1001' : 'euc_kr',
200 'ks_x_1001' : 'euc_kr',
201
202 # gb18030 codec
203 'gb18030_2000' : 'gb18030',
204
205 # gb2312 codec
206 'chinese' : 'gb2312',
207 'csiso58gb231280' : 'gb2312',
208 'euc_cn' : 'gb2312',
209 'euccn' : 'gb2312',
210 'eucgb2312_cn' : 'gb2312',
211 'gb2312_1980' : 'gb2312',
212 'gb2312_80' : 'gb2312',
213 'iso_ir_58' : 'gb2312',
214
215 # gbk codec
216 '936' : 'gbk',
217 'cp936' : 'gbk',
218 'ms936' : 'gbk',
219
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000220 # hex_codec codec
221 'hex' : 'hex_codec',
222
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000223 # hz codec
224 'hzgb' : 'hz',
225 'hz_gb' : 'hz',
226 'hz_gb_2312' : 'hz',
227
228 # iso2022_jp codec
229 'csiso2022jp' : 'iso2022_jp',
230 'iso2022jp' : 'iso2022_jp',
231 'iso_2022_jp' : 'iso2022_jp',
232
233 # iso2022_jp_1 codec
234 'iso2022jp_1' : 'iso2022_jp_1',
235 'iso_2022_jp_1' : 'iso2022_jp_1',
236
237 # iso2022_jp_2 codec
238 'iso2022jp_2' : 'iso2022_jp_2',
239 'iso_2022_jp_2' : 'iso2022_jp_2',
240
241 # iso_3022_jp_3 codec
242 'iso2022jp_3' : 'iso2022_jp_3',
243 'iso_2022_jp_3' : 'iso2022_jp_3',
244
245 # iso2022_jp_ext codec
246 'iso2022jp_ext' : 'iso2022_jp_ext',
247 'iso_2022_jp_ext' : 'iso2022_jp_ext',
248
249 # iso2022_kr codec
250 'csiso2022kr' : 'iso2022_kr',
251 'iso2022kr' : 'iso2022_kr',
252 'iso_2022_kr' : 'iso2022_kr',
253
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000254 # iso8859_10 codec
255 'csisolatin6' : 'iso8859_10',
256 'iso_8859_10' : 'iso8859_10',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000257 'iso_8859_10_1992' : 'iso8859_10',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000258 'iso_ir_157' : 'iso8859_10',
259 'l6' : 'iso8859_10',
260 'latin6' : 'iso8859_10',
261
262 # iso8859_13 codec
263 'iso_8859_13' : 'iso8859_13',
264
265 # iso8859_14 codec
266 'iso_8859_14' : 'iso8859_14',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000267 'iso_8859_14_1998' : 'iso8859_14',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000268 'iso_celtic' : 'iso8859_14',
269 'iso_ir_199' : 'iso8859_14',
270 'l8' : 'iso8859_14',
271 'latin8' : 'iso8859_14',
272
273 # iso8859_15 codec
274 'iso_8859_15' : 'iso8859_15',
275
276 # iso8859_2 codec
277 'csisolatin2' : 'iso8859_2',
278 'iso_8859_2' : 'iso8859_2',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000279 'iso_8859_2_1987' : 'iso8859_2',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000280 'iso_ir_101' : 'iso8859_2',
281 'l2' : 'iso8859_2',
282 'latin2' : 'iso8859_2',
283
284 # iso8859_3 codec
285 'csisolatin3' : 'iso8859_3',
286 'iso_8859_3' : 'iso8859_3',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000287 'iso_8859_3_1988' : 'iso8859_3',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000288 'iso_ir_109' : 'iso8859_3',
289 'l3' : 'iso8859_3',
290 'latin3' : 'iso8859_3',
291
292 # iso8859_4 codec
293 'csisolatin4' : 'iso8859_4',
294 'iso_8859_4' : 'iso8859_4',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000295 'iso_8859_4_1988' : 'iso8859_4',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000296 'iso_ir_110' : 'iso8859_4',
297 'l4' : 'iso8859_4',
298 'latin4' : 'iso8859_4',
299
300 # iso8859_5 codec
301 'csisolatincyrillic' : 'iso8859_5',
302 'cyrillic' : 'iso8859_5',
303 'iso_8859_5' : 'iso8859_5',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000304 'iso_8859_5_1988' : 'iso8859_5',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000305 'iso_ir_144' : 'iso8859_5',
306
307 # iso8859_6 codec
308 'arabic' : 'iso8859_6',
309 'asmo_708' : 'iso8859_6',
310 'csisolatinarabic' : 'iso8859_6',
311 'ecma_114' : 'iso8859_6',
312 'iso_8859_6' : 'iso8859_6',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000313 'iso_8859_6_1987' : 'iso8859_6',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000314 'iso_ir_127' : 'iso8859_6',
315
316 # iso8859_7 codec
317 'csisolatingreek' : 'iso8859_7',
318 'ecma_118' : 'iso8859_7',
319 'elot_928' : 'iso8859_7',
320 'greek' : 'iso8859_7',
321 'greek8' : 'iso8859_7',
322 'iso_8859_7' : 'iso8859_7',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000323 'iso_8859_7_1987' : 'iso8859_7',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000324 'iso_ir_126' : 'iso8859_7',
325
326 # iso8859_8 codec
327 'csisolatinhebrew' : 'iso8859_8',
328 'hebrew' : 'iso8859_8',
329 'iso_8859_8' : 'iso8859_8',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000330 'iso_8859_8_1988' : 'iso8859_8',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000331 'iso_ir_138' : 'iso8859_8',
332
333 # iso8859_9 codec
334 'csisolatin5' : 'iso8859_9',
335 'iso_8859_9' : 'iso8859_9',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000336 'iso_8859_9_1989' : 'iso8859_9',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000337 'iso_ir_148' : 'iso8859_9',
338 'l5' : 'iso8859_9',
339 'latin5' : 'iso8859_9',
340
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000341 # johab codec
342 'cp1361' : 'johab',
343 'ms1361' : 'johab',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000344
345 # koi8_r codec
346 'cskoi8r' : 'koi8_r',
347
348 # latin_1 codec
349 '8859' : 'latin_1',
350 'cp819' : 'latin_1',
351 'csisolatin1' : 'latin_1',
352 'ibm819' : 'latin_1',
353 'iso8859' : 'latin_1',
354 'iso_8859_1' : 'latin_1',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000355 'iso_8859_1_1987' : 'latin_1',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000356 'iso_ir_100' : 'latin_1',
357 'l1' : 'latin_1',
358 'latin' : 'latin_1',
359 'latin1' : 'latin_1',
360
361 # mac_cyrillic codec
362 'maccyrillic' : 'mac_cyrillic',
363
364 # mac_greek codec
365 'macgreek' : 'mac_greek',
366
367 # mac_iceland codec
368 'maciceland' : 'mac_iceland',
369
370 # mac_latin2 codec
371 'maccentraleurope' : 'mac_latin2',
372 'maclatin2' : 'mac_latin2',
373
374 # mac_roman codec
375 'macroman' : 'mac_roman',
376
377 # mac_turkish codec
378 'macturkish' : 'mac_turkish',
379
380 # mbcs codec
381 'dbcs' : 'mbcs',
382
383 # quopri_codec codec
384 'quopri' : 'quopri_codec',
385 'quoted_printable' : 'quopri_codec',
386 'quotedprintable' : 'quopri_codec',
387
388 # rot_13 codec
389 'rot13' : 'rot_13',
390
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000391 # shift_jis codec
392 'csshiftjis' : 'shift_jis',
393 'shiftjis' : 'shift_jis',
394 'sjis' : 'shift_jis',
395 's_jis' : 'shift_jis',
396
397 # shift_jisx0213 codec
398 'shiftjisx0213' : 'shift_jisx0213',
399 'sjisx0213' : 'shift_jisx0213',
400 's_jisx0213' : 'shift_jisx0213',
401
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000402 # tactis codec
403 'tis260' : 'tactis',
404
405 # utf_16 codec
406 'u16' : 'utf_16',
407 'utf16' : 'utf_16',
408
409 # utf_16_be codec
410 'unicodebigunmarked' : 'utf_16_be',
411 'utf_16be' : 'utf_16_be',
412
413 # utf_16_le codec
414 'unicodelittleunmarked' : 'utf_16_le',
415 'utf_16le' : 'utf_16_le',
416
417 # utf_7 codec
418 'u7' : 'utf_7',
419 'utf7' : 'utf_7',
420
421 # utf_8 codec
422 'u8' : 'utf_8',
423 'utf' : 'utf_8',
424 'utf8' : 'utf_8',
Marc-André Lemburg9d158bb2002-10-04 11:51:39 +0000425 'utf8_ucs2' : 'utf_8',
426 'utf8_ucs4' : 'utf_8',
Marc-André Lemburg462004e2002-02-10 21:36:20 +0000427
428 # uu_codec codec
429 'uu' : 'uu_codec',
430
431 # zlib_codec codec
432 'zip' : 'zlib_codec',
433 'zlib' : 'zlib_codec',
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000434
Guido van Rossum0229bf62000-03-10 23:17:24 +0000435}