From 1a717574c757f03819a26635fb60a6bfa4cf65f7 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 22 Mar 2026 14:25:34 -0700 Subject: [PATCH 1/3] Add tests for rejecting non-zero padding bits in base64/base32 RFC 4648 section 3.5 allows decoders to reject encoded data containing non-zero pad bits. Both a2b_base64 (strict_mode=True) and a2b_base32 currently silently discard non-zero trailing bits instead of raising binascii.Error. These tests document the expected behavior. Co-Authored-By: Claude Opus 4.6 (1M context) --- Lib/test/test_binascii.py | 70 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 1dcd2b25c79087..47bc4872462558 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -334,6 +334,34 @@ def assertInvalidLength(data, strict_mode=True): assertInvalidLength(b'A\tB\nC ??DE', # only 5 valid characters strict_mode=False) + def test_base64_nonzero_padding_bits(self): + # https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + # Decoders MAY reject encoded data if the pad bits are not zero. + + # 2 data chars + "==": last char has 4 padding bits + # 'A' = 0, 'B' = 1 ->000000 000001 ->byte 0x00, leftover 0001 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base64(self.type2test(b'AB=='), strict_mode=True) + # 'A' = 0, 'P' = 15 ->000000 001111 ->byte 0x00, leftover 1111 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base64(self.type2test(b'AP=='), strict_mode=True) + + # 3 data chars + "=": last char has 2 padding bits + # 'A' = 0, 'A' = 0, 'B' = 1 ->000000 000000 000001 ->bytes 0x00 0x00, + # leftover 01 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base64(self.type2test(b'AAB='), strict_mode=True) + # 'A' = 0, 'A' = 0, 'D' = 3 ->leftover 11 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base64(self.type2test(b'AAD='), strict_mode=True) + + # Verify that zero padding bits are accepted + binascii.a2b_base64(self.type2test(b'AA=='), strict_mode=True) + binascii.a2b_base64(self.type2test(b'AAA='), strict_mode=True) + + # Full quads with no padding have no leftover bits --always valid + binascii.a2b_base64(self.type2test(b'AAAA'), strict_mode=True) + def test_base64_alphabet(self): alphabet = (b'!"#$%&\'()*+,-012345689@' b'ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr') @@ -824,6 +852,48 @@ def assertInvalidLength(*args): assertInvalidLength(b"BEEFCA=K", b"\t\x08Q\x01") assertInvalidLength(b"BEEFCA=====K", b"\t\x08Q\x01") + def test_base32_nonzero_padding_bits(self): + # https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + # Decoders MAY reject encoded data if the pad bits are not zero. + + # 2 data chars + "======": last char has 2 padding bits + # 'AB' ->00000 00001 ->byte 0x00, leftover 01 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AB======')) + # 'AD' ->00000 00011 ->byte 0x00, leftover 11 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AD======')) + + # 4 data chars + "====": last char has 4 padding bits + # 'AAAB' ->00000 00000 00000 00001 ->bytes 0x00 0x00, leftover 0001 + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AAAB====')) + # 'AAAP' ->leftover 1111 + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AAAP====')) + + # 5 data chars + "===": last char has 1 padding bit + # 'AAAAB' ->4*00000 + 00001 ->bytes 0x00*3, leftover 1 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AAAAB===')) + + # 7 data chars + "=": last char has 3 padding bits + # 'AAAAAAB' ->6*00000 + 00001 ->bytes 0x00*4, leftover 001 + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AAAAAAB=')) + # 'AAAAAAH' ->leftover 111 + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AAAAAAH=')) + + # Verify that zero padding bits are accepted + binascii.a2b_base32(self.type2test(b'AA======')) + binascii.a2b_base32(self.type2test(b'AAAA====')) + binascii.a2b_base32(self.type2test(b'AAAAA===')) + binascii.a2b_base32(self.type2test(b'AAAAAAA=')) + + # Full octet with no padding --always valid + binascii.a2b_base32(self.type2test(b'AAAAAAAA')) + def test_base32_alphabet(self): alphabet = b'0Aa1Bb2Cc3Dd4Ee5Ff6Gg7Hh8Ii9JjKk' data = self.type2test(self.rawdata) From 0ca2563a96284ace9bef9c1f979799117cd7ad07 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 22 Mar 2026 14:44:56 -0700 Subject: [PATCH 2/3] Reject non-zero padding bits in base64/base32 decoding Add leftchar validation after the main decode loop in a2b_base64 (strict_mode only) and a2b_base32 (always). Fix existing test data that incidentally had non-zero padding bits to use characters with zero trailing bits while preserving the same decoded output. Co-Authored-By: Claude Opus 4.6 (1M context) --- Lib/test/test_binascii.py | 58 +++++++++++++++++++-------------------- Modules/binascii.c | 20 ++++++++++++++ 2 files changed, 49 insertions(+), 29 deletions(-) diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 47bc4872462558..40a40f88007aaf 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -183,8 +183,8 @@ def assertExcessPadding(*args): def assertInvalidLength(*args): _assertRegexTemplate(r'(?i)Invalid.+number of data characters', *args) - assertExcessPadding(b'ab===', b'i') - assertExcessPadding(b'ab====', b'i') + assertExcessPadding(b'aQ===', b'i') + assertExcessPadding(b'aQ====', b'i') assertExcessPadding(b'abc==', b'i\xb7') assertExcessPadding(b'abc===', b'i\xb7') assertExcessPadding(b'abc====', b'i\xb7') @@ -201,7 +201,7 @@ def assertInvalidLength(*args): assertLeadingPadding(b'====abcd', b'i\xb7\x1d') assertLeadingPadding(b'=====abcd', b'i\xb7\x1d') - assertInvalidLength(b'a=b==', b'i') + assertInvalidLength(b'a=Q==', b'i') assertInvalidLength(b'a=bc=', b'i\xb7') assertInvalidLength(b'a=bc==', b'i\xb7') assertInvalidLength(b'a=bcd', b'i\xb7\x1d') @@ -241,17 +241,17 @@ def assertNonBase64Data(data, expected, ignorechars): self.assertEqual(binascii.a2b_base64(data, strict_mode=False, ignorechars=b''), expected) - assertNonBase64Data(b'\nab==', b'i', ignorechars=b'\n') - assertNonBase64Data(b'ab:(){:|:&};:==', b'i', ignorechars=b':;(){}|&') - assertNonBase64Data(b'a\nb==', b'i', ignorechars=b'\n') - assertNonBase64Data(b'a\x00b==', b'i', ignorechars=b'\x00') - assertNonBase64Data(b'ab:==', b'i', ignorechars=b':') - assertNonBase64Data(b'ab=:=', b'i', ignorechars=b':') - assertNonBase64Data(b'ab==:', b'i', ignorechars=b':') + assertNonBase64Data(b'\naQ==', b'i', ignorechars=b'\n') + assertNonBase64Data(b'aQ:(){:|:&};:==', b'i', ignorechars=b':;(){}|&') + assertNonBase64Data(b'a\nQ==', b'i', ignorechars=b'\n') + assertNonBase64Data(b'a\x00Q==', b'i', ignorechars=b'\x00') + assertNonBase64Data(b'aQ:==', b'i', ignorechars=b':') + assertNonBase64Data(b'aQ=:=', b'i', ignorechars=b':') + assertNonBase64Data(b'aQ==:', b'i', ignorechars=b':') assertNonBase64Data(b'abc=:', b'i\xb7', ignorechars=b':') - assertNonBase64Data(b'ab==\n', b'i', ignorechars=b'\n') - assertNonBase64Data(b'a\nb==', b'i', ignorechars=bytearray(b'\n')) - assertNonBase64Data(b'a\nb==', b'i', ignorechars=memoryview(b'\n')) + assertNonBase64Data(b'aQ==\n', b'i', ignorechars=b'\n') + assertNonBase64Data(b'a\nQ==', b'i', ignorechars=bytearray(b'\n')) + assertNonBase64Data(b'a\nQ==', b'i', ignorechars=memoryview(b'\n')) # Same cell in the cache: '\r' >> 3 == '\n' >> 3. data = self.type2test(b'\r\n') @@ -766,19 +766,19 @@ def assertInvalidLength(*args): assertExcessData(b"ABCDEFG=H") assertExcessData(b"432Z====55555555") - assertExcessData(b"BE======EF", b"\t\x08") + assertExcessData(b"BE======EA", b"\t\x08") assertExcessData(b"BEEF====C", b"\t\x08Q") - assertExcessData(b"BEEFC===AK", b"\t\x08Q\x01") + assertExcessData(b"BEEFC===AI", b"\t\x08Q\x01") assertExcessData(b"BEEFCAK=E", b"\t\x08Q\x01D") assertExcessPadding(b"BE=======", b"\t") assertExcessPadding(b"BE========", b"\t") - assertExcessPadding(b"BEEF=====", b"\t\x08") - assertExcessPadding(b"BEEF======", b"\t\x08") + assertExcessPadding(b"BEEA=====", b"\t\x08") + assertExcessPadding(b"BEEA======", b"\t\x08") assertExcessPadding(b"BEEFC====", b"\t\x08Q") assertExcessPadding(b"BEEFC=====", b"\t\x08Q") - assertExcessPadding(b"BEEFCAK==", b"\t\x08Q\x01") - assertExcessPadding(b"BEEFCAK===", b"\t\x08Q\x01") + assertExcessPadding(b"BEEFCAI==", b"\t\x08Q\x01") + assertExcessPadding(b"BEEFCAI===", b"\t\x08Q\x01") assertExcessPadding(b"BEEFCAKE=", b"\t\x08Q\x01D") assertExcessPadding(b"BEEFCAKE==", b"\t\x08Q\x01D") assertExcessPadding(b"BEEFCAKE===", b"\t\x08Q\x01D") @@ -818,16 +818,16 @@ def assertInvalidLength(*args): assertIncorrectPadding(b"BE===", b"\t") assertIncorrectPadding(b"BE====", b"\t") assertIncorrectPadding(b"BE=====", b"\t") - assertIncorrectPadding(b"BEEF=", b"\t\x08") - assertIncorrectPadding(b"BEEF==", b"\t\x08") - assertIncorrectPadding(b"BEEF===", b"\t\x08") + assertIncorrectPadding(b"BEEA=", b"\t\x08") + assertIncorrectPadding(b"BEEA==", b"\t\x08") + assertIncorrectPadding(b"BEEA===", b"\t\x08") assertIncorrectPadding(b"BEEFC=", b"\t\x08Q") assertIncorrectPadding(b"BEEFC==", b"\t\x08Q") - assertDiscontinuousPadding(b"BE=EF===", b"\t\x08") - assertDiscontinuousPadding(b"BE==EF==", b"\t\x08") + assertDiscontinuousPadding(b"BE=EA===", b"\t\x08") + assertDiscontinuousPadding(b"BE==EA==", b"\t\x08") assertDiscontinuousPadding(b"BEEF=C==", b"\t\x08Q") - assertDiscontinuousPadding(b"BEEFC=AK", b"\t\x08Q\x01") + assertDiscontinuousPadding(b"BEEFC=AI", b"\t\x08Q\x01") assertInvalidLength(b"A") assertInvalidLength(b"ABC") @@ -847,10 +847,10 @@ def assertInvalidLength(*args): assertInvalidLength(b"B=E=====", b"\t") assertInvalidLength(b"B==E====", b"\t") - assertInvalidLength(b"BEE=F===", b"\t\x08") - assertInvalidLength(b"BEE==F==", b"\t\x08") - assertInvalidLength(b"BEEFCA=K", b"\t\x08Q\x01") - assertInvalidLength(b"BEEFCA=====K", b"\t\x08Q\x01") + assertInvalidLength(b"BEE=A===", b"\t\x08") + assertInvalidLength(b"BEE==A==", b"\t\x08") + assertInvalidLength(b"BEEFCA=I", b"\t\x08Q\x01") + assertInvalidLength(b"BEEFCA=====I", b"\t\x08Q\x01") def test_base32_nonzero_padding_bits(self): # https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 diff --git a/Modules/binascii.c b/Modules/binascii.c index a57bf3ee6339f5..d016cb4f01c4c5 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -902,6 +902,16 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, goto error_end; } + /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + * Decoders MAY reject non-zero padding bits. */ + if (strict_mode && leftchar != 0) { + state = get_binascii_state(module); + if (state) { + PyErr_SetString(state->Error, "Non-zero padding bits"); + } + goto error_end; + } + Py_XDECREF(table_obj); return PyBytesWriter_FinishWithPointer(writer, bin_data); @@ -1652,6 +1662,16 @@ binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, goto error; } + /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + * Decoders MAY reject non-zero padding bits. */ + if (leftchar != 0) { + state = get_binascii_state(module); + if (state) { + PyErr_SetString(state->Error, "Non-zero padding bits"); + } + goto error; + } + Py_XDECREF(table_obj); return PyBytesWriter_FinishWithPointer(writer, bin_data); From 615b2279cea420a52910e962ca22ca13730bf8fc Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 22 Mar 2026 15:15:21 -0700 Subject: [PATCH 3/3] Fix test_base64 test data with non-zero padding bits Co-Authored-By: Claude Opus 4.6 (1M context) --- Lib/test/test_base64.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 9648624b267a54..1b3e040a85952c 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -311,12 +311,12 @@ def test_b64decode_padding_error(self): def test_b64decode_invalid_chars(self): # issue 1466065: Test some invalid characters. - tests = ((b'%3d==', b'\xdd', b'%$'), - (b'$3d==', b'\xdd', b'%$'), + tests = ((b'%3Q==', b'\xdd', b'%$'), + (b'$3Q==', b'\xdd', b'%$'), (b'[==', b'', b'[='), - (b'YW]3=', b'am', b']'), - (b'3{d==', b'\xdd', b'{}'), - (b'3d}==', b'\xdd', b'{}'), + (b'YW]0=', b'am', b']'), + (b'3{Q==', b'\xdd', b'{}'), + (b'3Q}==', b'\xdd', b'{}'), (b'@@', b'', b'@!'), (b'!', b'', b'@!'), (b"YWJj\n", b"abc", b'\n'),