Skip to content

Commit 894bee6

Browse files
author
James William Pye
committed
Refactor encoding alias handling.
Add some mapping entries and functionally map win### and windows-### encoding names to cp###.
1 parent d54fc09 commit 894bee6

File tree

2 files changed

+45
-13
lines changed

2 files changed

+45
-13
lines changed

postgresql/encodings/aliases.py

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,57 @@
33
# http://python.projects.postgresql.org
44
##
55
"""
6-
Aliases for Python encodings that Postgres uses.
6+
Module for mapping PostgreSQL encoding names to Python encoding names.
7+
8+
These are **not** installed in Python's aliases. Rather, `get_python_name`
9+
should be used directly.
10+
11+
URLs of interest:
12+
* http://docs.python.org/library/codecs.html
13+
* http://git.postgresql.org/gitweb?p=postgresql.git;a=blob;f=src/backend/utils/mb/encnames.c
714
"""
815

9-
# dictionary of Postgres encoding names to Python encoding names
16+
##
17+
#: Dictionary of Postgres encoding names to Python encoding names.
18+
#: This mapping only contains those encoding names that do not intersect.
1019
postgres_to_python = {
1120
'unicode' : 'utf_8',
1221
'sql_ascii' : 'ascii',
1322
'euc_jp' : 'eucjp',
1423
'euc_cn' : 'euccn',
1524
'euc_kr' : 'euckr',
25+
'shift_jis_2004' : 'euc_jis_2004',
26+
'sjis' : 'shift_jis',
27+
'alt' : 'cp866', # IBM866
28+
'abc' : 'cp1258',
29+
'vscii' : 'cp1258',
30+
'koi8r' : 'koi8_r',
31+
'koi8u' : 'koi8_u',
32+
'tcvn' : 'cp1258',
33+
'tcvn5712' : 'cp1258',
1634
# 'euc_tw' : None, # N/A
1735
# 'mule_internal' : None, # N/A
18-
'alt' : 'cp866',
19-
'win874' : 'cp874',
20-
'koi8r' : 'koi8_r',
21-
'tcvn' : 'windows_1258',
22-
'win1250' : 'windows_1250',
23-
'win1251' : 'windows_1251',
24-
'win1252' : 'windows_1252',
25-
'win1256' : 'windows_1256',
26-
'win1258' : 'windows_1258',
2736
}
37+
38+
def get_python_name(encname):
39+
"""
40+
Lookup the name in the `postgres_to_python` dictionary. If no match is
41+
found, check for a 'win' or 'windows-' name and convert that to a 'cp###'
42+
name.
43+
44+
Returns `None` if there is no alias for `encname`.
45+
46+
The win[0-9]+ and windows-[0-9]+ entries are handled functionally.
47+
"""
48+
# check the dictionary first
49+
localname = postgres_to_python.get(encname)
50+
if localname is not None:
51+
return localname
52+
# no explicit mapping, check for functional transformation
53+
if encname.startswith('win'):
54+
# handle win#### and windows-####
55+
# remove the trailing CP number
56+
bare = encname.rstrip('0123456789')
57+
if bare.strip('_-') in ('win', 'windows'):
58+
return 'cp' + encname[len(bare):]
59+
return encname

postgresql/protocol/typio.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -756,8 +756,8 @@ def type_from_oid(self, oid):
756756

757757
def set_encoding(self, value):
758758
self.encoding = value.lower().strip()
759-
enc = pg_enc_aliases.postgres_to_python.get(self.encoding, value)
760-
ci = codecs.lookup(enc)
759+
enc = pg_enc_aliases.get_python_name(self.encoding)
760+
ci = codecs.lookup(enc or self.encoding)
761761
self._encode = ci[0]
762762
self._decode = ci[1]
763763

0 commit comments

Comments
 (0)