|
3 | 3 | # http://python.projects.postgresql.org |
4 | 4 | ## |
5 | 5 | """ |
6 | | -Aliases for Python encodings that Postgres uses. |
| 6 | +Module for mapping PostgreSQL encoding names to Python encoding names. |
| 7 | +
|
| 8 | +These are **not** installed in Python's aliases. Rather, `get_python_name` |
| 9 | +should be used directly. |
| 10 | +
|
| 11 | +URLs of interest: |
| 12 | + * http://docs.python.org/library/codecs.html |
| 13 | + * http://git.postgresql.org/gitweb?p=postgresql.git;a=blob;f=src/backend/utils/mb/encnames.c |
7 | 14 | """ |
8 | 15 |
|
9 | | -# dictionary of Postgres encoding names to Python encoding names |
| 16 | +## |
| 17 | +#: Dictionary of Postgres encoding names to Python encoding names. |
| 18 | +#: This mapping only contains those encoding names that do not intersect. |
10 | 19 | postgres_to_python = { |
11 | 20 | 'unicode' : 'utf_8', |
12 | 21 | 'sql_ascii' : 'ascii', |
13 | 22 | 'euc_jp' : 'eucjp', |
14 | 23 | 'euc_cn' : 'euccn', |
15 | 24 | 'euc_kr' : 'euckr', |
| 25 | + 'shift_jis_2004' : 'euc_jis_2004', |
| 26 | + 'sjis' : 'shift_jis', |
| 27 | + 'alt' : 'cp866', # IBM866 |
| 28 | + 'abc' : 'cp1258', |
| 29 | + 'vscii' : 'cp1258', |
| 30 | + 'koi8r' : 'koi8_r', |
| 31 | + 'koi8u' : 'koi8_u', |
| 32 | + 'tcvn' : 'cp1258', |
| 33 | + 'tcvn5712' : 'cp1258', |
16 | 34 | # 'euc_tw' : None, # N/A |
17 | 35 | # 'mule_internal' : None, # N/A |
18 | | - 'alt' : 'cp866', |
19 | | - 'win874' : 'cp874', |
20 | | - 'koi8r' : 'koi8_r', |
21 | | - 'tcvn' : 'windows_1258', |
22 | | - 'win1250' : 'windows_1250', |
23 | | - 'win1251' : 'windows_1251', |
24 | | - 'win1252' : 'windows_1252', |
25 | | - 'win1256' : 'windows_1256', |
26 | | - 'win1258' : 'windows_1258', |
27 | 36 | } |
| 37 | + |
| 38 | +def get_python_name(encname): |
| 39 | + """ |
| 40 | + Lookup the name in the `postgres_to_python` dictionary. If no match is |
| 41 | + found, check for a 'win' or 'windows-' name and convert that to a 'cp###' |
| 42 | + name. |
| 43 | +
|
| 44 | + Returns `None` if there is no alias for `encname`. |
| 45 | +
|
| 46 | + The win[0-9]+ and windows-[0-9]+ entries are handled functionally. |
| 47 | + """ |
| 48 | + # check the dictionary first |
| 49 | + localname = postgres_to_python.get(encname) |
| 50 | + if localname is not None: |
| 51 | + return localname |
| 52 | + # no explicit mapping, check for functional transformation |
| 53 | + if encname.startswith('win'): |
| 54 | + # handle win#### and windows-#### |
| 55 | + # remove the trailing CP number |
| 56 | + bare = encname.rstrip('0123456789') |
| 57 | + if bare.strip('_-') in ('win', 'windows'): |
| 58 | + return 'cp' + encname[len(bare):] |
| 59 | + return encname |
0 commit comments