diff --git a/Lib/csv.py b/Lib/csv.py index 77f30c8d2b..cd20265987 100644 --- a/Lib/csv.py +++ b/Lib/csv.py @@ -1,28 +1,90 @@ -""" -csv.py - read/write/investigate CSV files +r""" +CSV parsing and writing. + +This module provides classes that assist in the reading and writing +of Comma Separated Value (CSV) files, and implements the interface +described by PEP 305. Although many CSV files are simple to parse, +the format is not formally defined by a stable specification and +is subtle enough that parsing lines of a CSV file with something +like line.split(",") is bound to fail. The module supports three +basic APIs: reading, writing, and registration of dialects. + + +DIALECT REGISTRATION: + +Readers and writers support a dialect argument, which is a convenient +handle on a group of settings. When the dialect argument is a string, +it identifies one of the dialects previously registered with the module. +If it is a class or instance, the attributes of the argument are used as +the settings for the reader or writer: + + class excel: + delimiter = ',' + quotechar = '"' + escapechar = None + doublequote = True + skipinitialspace = False + lineterminator = '\r\n' + quoting = QUOTE_MINIMAL + +SETTINGS: + + * quotechar - specifies a one-character string to use as the + quoting character. It defaults to '"'. + * delimiter - specifies a one-character string to use as the + field separator. It defaults to ','. + * skipinitialspace - specifies how to interpret spaces which + immediately follow a delimiter. It defaults to False, which + means that spaces immediately following a delimiter is part + of the following field. + * lineterminator - specifies the character sequence which should + terminate rows. + * quoting - controls when quotes should be generated by the writer. + It can take on any of the following module constants: + + csv.QUOTE_MINIMAL means only when required, for example, when a + field contains either the quotechar or the delimiter + csv.QUOTE_ALL means that quotes are always placed around fields. + csv.QUOTE_NONNUMERIC means that quotes are always placed around + fields which do not parse as integers or floating-point + numbers. + csv.QUOTE_STRINGS means that quotes are always placed around + fields which are strings. Note that the Python value None + is not a string. + csv.QUOTE_NOTNULL means that quotes are only placed around fields + that are not the Python value None. + csv.QUOTE_NONE means that quotes are never placed around fields. + * escapechar - specifies a one-character string used to escape + the delimiter when quoting is set to QUOTE_NONE. + * doublequote - controls the handling of quotes inside fields. When + True, two consecutive quotes are interpreted as one during read, + and when writing, each quote character embedded in the data is + written as two quotes """ import re import types -from _csv import Error, __version__, writer, reader, register_dialect, \ +from _csv import Error, writer, reader, register_dialect, \ unregister_dialect, get_dialect, list_dialects, \ field_size_limit, \ QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, \ - QUOTE_STRINGS, QUOTE_NOTNULL, \ - __doc__ + QUOTE_STRINGS, QUOTE_NOTNULL from _csv import Dialect as _Dialect from io import StringIO __all__ = ["QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE", "QUOTE_STRINGS", "QUOTE_NOTNULL", - "Error", "Dialect", "__doc__", "excel", "excel_tab", + "Error", "Dialect", "excel", "excel_tab", "field_size_limit", "reader", "writer", "register_dialect", "get_dialect", "list_dialects", "Sniffer", - "unregister_dialect", "__version__", "DictReader", "DictWriter", + "unregister_dialect", "DictReader", "DictWriter", "unix_dialect"] +__version__ = "1.0" + + class Dialect: """Describe a CSV dialect. @@ -51,8 +113,8 @@ def _validate(self): try: _Dialect(self) except TypeError as e: - # We do this for compatibility with py2.3 - raise Error(str(e)) + # Re-raise to get a traceback showing more user code. + raise Error(str(e)) from None class excel(Dialect): """Describe the usual properties of Excel-generated CSV files.""" diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 9a1743da6d..95cf51bf08 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -10,7 +10,7 @@ import gc import pickle from test import support -from test.support import warnings_helper, import_helper, check_disallow_instantiation +from test.support import import_helper, check_disallow_instantiation from itertools import permutations from textwrap import dedent from collections import OrderedDict @@ -28,14 +28,20 @@ class Test_Csv(unittest.TestCase): in TestDialectRegistry. """ def _test_arg_valid(self, ctor, arg): + ctor(arg) self.assertRaises(TypeError, ctor) self.assertRaises(TypeError, ctor, None) - self.assertRaises(TypeError, ctor, arg, bad_attr = 0) - self.assertRaises(TypeError, ctor, arg, delimiter = 0) - self.assertRaises(TypeError, ctor, arg, delimiter = 'XX') + self.assertRaises(TypeError, ctor, arg, bad_attr=0) + self.assertRaises(TypeError, ctor, arg, delimiter='') + self.assertRaises(TypeError, ctor, arg, escapechar='') + self.assertRaises(TypeError, ctor, arg, quotechar='') + self.assertRaises(TypeError, ctor, arg, delimiter='^^') + self.assertRaises(TypeError, ctor, arg, escapechar='^^') + self.assertRaises(TypeError, ctor, arg, quotechar='^^') self.assertRaises(csv.Error, ctor, arg, 'foo') self.assertRaises(TypeError, ctor, arg, delimiter=None) self.assertRaises(TypeError, ctor, arg, delimiter=1) + self.assertRaises(TypeError, ctor, arg, escapechar=1) self.assertRaises(TypeError, ctor, arg, quotechar=1) self.assertRaises(TypeError, ctor, arg, lineterminator=None) self.assertRaises(TypeError, ctor, arg, lineterminator=1) @@ -46,11 +52,48 @@ def _test_arg_valid(self, ctor, arg): quoting=csv.QUOTE_ALL, quotechar=None) self.assertRaises(TypeError, ctor, arg, quoting=csv.QUOTE_NONE, quotechar='') + self.assertRaises(ValueError, ctor, arg, delimiter='\n') + self.assertRaises(ValueError, ctor, arg, escapechar='\n') + self.assertRaises(ValueError, ctor, arg, quotechar='\n') + self.assertRaises(ValueError, ctor, arg, delimiter='\r') + self.assertRaises(ValueError, ctor, arg, escapechar='\r') + self.assertRaises(ValueError, ctor, arg, quotechar='\r') + ctor(arg, delimiter=' ') + ctor(arg, escapechar=' ') + ctor(arg, quotechar=' ') + ctor(arg, delimiter='\t', skipinitialspace=True) + ctor(arg, escapechar='\t', skipinitialspace=True) + ctor(arg, quotechar='\t', skipinitialspace=True) + ctor(arg, delimiter=' ', skipinitialspace=True) + self.assertRaises(ValueError, ctor, arg, + escapechar=' ', skipinitialspace=True) + self.assertRaises(ValueError, ctor, arg, + quotechar=' ', skipinitialspace=True) + ctor(arg, delimiter='^') + ctor(arg, escapechar='^') + ctor(arg, quotechar='^') + self.assertRaises(ValueError, ctor, arg, delimiter='^', escapechar='^') + self.assertRaises(ValueError, ctor, arg, delimiter='^', quotechar='^') + self.assertRaises(ValueError, ctor, arg, escapechar='^', quotechar='^') + ctor(arg, delimiter='\x85') + ctor(arg, escapechar='\x85') + ctor(arg, quotechar='\x85') + ctor(arg, lineterminator='\x85') + self.assertRaises(ValueError, ctor, arg, + delimiter='\x85', lineterminator='\x85') + self.assertRaises(ValueError, ctor, arg, + escapechar='\x85', lineterminator='\x85') + self.assertRaises(ValueError, ctor, arg, + quotechar='\x85', lineterminator='\x85') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_reader_arg_valid(self): self._test_arg_valid(csv.reader, []) self.assertRaises(OSError, csv.reader, BadIterable()) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_writer_arg_valid(self): self._test_arg_valid(csv.writer, StringIO()) class BadWriter: @@ -150,13 +193,8 @@ def _write_error_test(self, exc, fields, **kwargs): fileobj.seek(0) self.assertEqual(fileobj.read(), '') - # TODO: RUSTPYTHON ''\r\n to ""\r\n unsupported - @unittest.expectedFailure def test_write_arg_valid(self): self._write_error_test(csv.Error, None) - self._write_test((), '') - self._write_test([None], '""') - self._write_error_test(csv.Error, [None], quoting = csv.QUOTE_NONE) # Check that exceptions are passed up the chain self._write_error_test(OSError, BadIterable()) class BadList: @@ -170,14 +208,13 @@ class BadItem: def __str__(self): raise OSError self._write_error_test(OSError, [BadItem()]) - def test_write_bigfield(self): # This exercises the buffer realloc functionality bigstring = 'X' * 50000 self._write_test([bigstring,bigstring], '%s,%s' % \ (bigstring, bigstring)) - # TODO: RUSTPYTHON quoting style check is unsupported + # TODO: RUSTPYTHON @unittest.expectedFailure def test_write_quoting(self): self._write_test(['a',1,'p,q'], 'a,1,"p,q"') @@ -196,7 +233,7 @@ def test_write_quoting(self): self._write_test(['a','',None,1], '"a","",,"1"', quoting = csv.QUOTE_NOTNULL) - # TODO: RUSTPYTHON doublequote check is unsupported + # TODO: RUSTPYTHON @unittest.expectedFailure def test_write_escape(self): self._write_test(['a',1,'p,q'], 'a,1,"p,q"', @@ -229,7 +266,7 @@ def test_write_escape(self): self._write_test(['C\\', '6', '7', 'X"'], 'C\\\\,6,7,"X"""', escapechar='\\', quoting=csv.QUOTE_MINIMAL) - # TODO: RUSTPYTHON lineterminator double char unsupported + # TODO: RUSTPYTHON @unittest.expectedFailure def test_write_lineterminator(self): for lineterminator in '\r\n', '\n', '\r', '!@#', '\0': @@ -238,11 +275,13 @@ def test_write_lineterminator(self): writer = csv.writer(sio, lineterminator=lineterminator) writer.writerow(['a', 'b']) writer.writerow([1, 2]) + writer.writerow(['\r', '\n']) self.assertEqual(sio.getvalue(), f'a,b{lineterminator}' - f'1,2{lineterminator}') + f'1,2{lineterminator}' + f'"\r","\n"{lineterminator}') - # TODO: RUSTPYTHON ''\r\n to ""\r\n unspported + # TODO: RUSTPYTHON @unittest.expectedFailure def test_write_iterable(self): self._write_test(iter(['a', 1, 'p,q']), 'a,1,"p,q"') @@ -285,6 +324,53 @@ def test_writerows_with_none(self): fileobj.seek(0) self.assertEqual(fileobj.read(), 'a\r\n""\r\n') + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_write_empty_fields(self): + self._write_test((), '') + self._write_test([''], '""') + self._write_error_test(csv.Error, [''], quoting=csv.QUOTE_NONE) + self._write_test([''], '""', quoting=csv.QUOTE_STRINGS) + self._write_test([''], '""', quoting=csv.QUOTE_NOTNULL) + self._write_test([None], '""') + self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_NONE) + self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_STRINGS) + self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_NOTNULL) + self._write_test(['', ''], ',') + self._write_test([None, None], ',') + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_write_empty_fields_space_delimiter(self): + self._write_test([''], '""', delimiter=' ', skipinitialspace=False) + self._write_test([''], '""', delimiter=' ', skipinitialspace=True) + self._write_test([None], '""', delimiter=' ', skipinitialspace=False) + self._write_test([None], '""', delimiter=' ', skipinitialspace=True) + + self._write_test(['', ''], ' ', delimiter=' ', skipinitialspace=False) + self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=True) + self._write_test([None, None], ' ', delimiter=' ', skipinitialspace=False) + self._write_test([None, None], '"" ""', delimiter=' ', skipinitialspace=True) + + self._write_test(['', ''], ' ', delimiter=' ', skipinitialspace=False, + quoting=csv.QUOTE_NONE) + self._write_error_test(csv.Error, ['', ''], + delimiter=' ', skipinitialspace=True, + quoting=csv.QUOTE_NONE) + for quoting in csv.QUOTE_STRINGS, csv.QUOTE_NOTNULL: + self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=False, + quoting=quoting) + self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=True, + quoting=quoting) + + for quoting in csv.QUOTE_NONE, csv.QUOTE_STRINGS, csv.QUOTE_NOTNULL: + self._write_test([None, None], ' ', delimiter=' ', skipinitialspace=False, + quoting=quoting) + self._write_error_test(csv.Error, [None, None], + delimiter=' ', skipinitialspace=True, + quoting=quoting) + def test_writerows_errors(self): with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: writer = csv.writer(fileobj) @@ -296,7 +382,7 @@ def _read_test(self, input, expect, **kwargs): result = list(reader) self.assertEqual(result, expect) - # TODO RUSTPYTHON strict mode is unsupported + # TODO: RUSTPYTHON @unittest.expectedFailure def test_read_oddinputs(self): self._read_test([], []) @@ -308,16 +394,23 @@ def test_read_oddinputs(self): self.assertRaises(csv.Error, self._read_test, [b'abc'], None) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_read_eol(self): - self._read_test(['a,b'], [['a','b']]) - self._read_test(['a,b\n'], [['a','b']]) - self._read_test(['a,b\r\n'], [['a','b']]) - self._read_test(['a,b\r'], [['a','b']]) - self.assertRaises(csv.Error, self._read_test, ['a,b\rc,d'], []) - self.assertRaises(csv.Error, self._read_test, ['a,b\nc,d'], []) - self.assertRaises(csv.Error, self._read_test, ['a,b\r\nc,d'], []) - - # TODO RUSTPYTHON double quote umimplement + self._read_test(['a,b', 'c,d'], [['a','b'], ['c','d']]) + self._read_test(['a,b\n', 'c,d\n'], [['a','b'], ['c','d']]) + self._read_test(['a,b\r\n', 'c,d\r\n'], [['a','b'], ['c','d']]) + self._read_test(['a,b\r', 'c,d\r'], [['a','b'], ['c','d']]) + + errmsg = "with newline=''" + with self.assertRaisesRegex(csv.Error, errmsg): + next(csv.reader(['a,b\rc,d'])) + with self.assertRaisesRegex(csv.Error, errmsg): + next(csv.reader(['a,b\nc,d'])) + with self.assertRaisesRegex(csv.Error, errmsg): + next(csv.reader(['a,b\r\nc,d'])) + + # TODO: RUSTPYTHON @unittest.expectedFailure def test_read_eof(self): self._read_test(['a,"'], [['a', '']]) @@ -328,7 +421,7 @@ def test_read_eof(self): self.assertRaises(csv.Error, self._read_test, ['^'], [], escapechar='^', strict=True) - # TODO RUSTPYTHON + # TODO: RUSTPYTHON @unittest.expectedFailure def test_read_nul(self): self._read_test(['\0'], [['\0']]) @@ -342,7 +435,7 @@ def test_read_delimiter(self): self._read_test(['a;b;c'], [['a', 'b', 'c']], delimiter=';') self._read_test(['a\0b\0c'], [['a', 'b', 'c']], delimiter='\0') - # TODO RUSTPYTHON + # TODO: RUSTPYTHON @unittest.expectedFailure def test_read_escape(self): self._read_test(['a,\\b,c'], [['a', 'b', 'c']], escapechar='\\') @@ -356,7 +449,7 @@ def test_read_escape(self): self._read_test(['a,\\b,c'], [['a', '\\b', 'c']], escapechar=None) self._read_test(['a,\\b,c'], [['a', '\\b', 'c']]) - # TODO RUSTPYTHON escapechar unsupported + # TODO: RUSTPYTHON @unittest.expectedFailure def test_read_quoting(self): self._read_test(['1,",3,",5'], [['1', ',3,', '5']]) @@ -367,17 +460,58 @@ def test_read_quoting(self): # will this fail where locale uses comma for decimals? self._read_test([',3,"5",7.3, 9'], [['', 3, '5', 7.3, 9]], quoting=csv.QUOTE_NONNUMERIC) + self._read_test([',3,"5",7.3, 9'], [[None, '3', '5', '7.3', ' 9']], + quoting=csv.QUOTE_NOTNULL) + self._read_test([',3,"5",7.3, 9'], [[None, 3, '5', 7.3, 9]], + quoting=csv.QUOTE_STRINGS) + + self._read_test([',,"",'], [['', '', '', '']]) + self._read_test([',,"",'], [['', '', '', '']], + quoting=csv.QUOTE_NONNUMERIC) + self._read_test([',,"",'], [[None, None, '', None]], + quoting=csv.QUOTE_NOTNULL) + self._read_test([',,"",'], [[None, None, '', None]], + quoting=csv.QUOTE_STRINGS) + self._read_test(['"a\nb", 7'], [['a\nb', ' 7']]) self.assertRaises(ValueError, self._read_test, ['abc,3'], [[]], quoting=csv.QUOTE_NONNUMERIC) + self.assertRaises(ValueError, self._read_test, + ['abc,3'], [[]], + quoting=csv.QUOTE_STRINGS) self._read_test(['1,@,3,@,5'], [['1', ',3,', '5']], quotechar='@') self._read_test(['1,\0,3,\0,5'], [['1', ',3,', '5']], quotechar='\0') + self._read_test(['1\\.5,\\.5,.5'], [[1.5, 0.5, 0.5]], + quoting=csv.QUOTE_NONNUMERIC, escapechar='\\') + self._read_test(['1\\.5,\\.5,"\\.5"'], [[1.5, 0.5, ".5"]], + quoting=csv.QUOTE_STRINGS, escapechar='\\') + # TODO: RUSTPYTHON; panic + @unittest.skip("TODO: RUSTPYTHON; slice index starts at 1 but ends at 0") def test_read_skipinitialspace(self): self._read_test(['no space, space, spaces,\ttab'], [['no space', 'space', 'spaces', '\ttab']], skipinitialspace=True) + self._read_test([' , , '], + [['', '', '']], + skipinitialspace=True) + self._read_test([' , , '], + [[None, None, None]], + skipinitialspace=True, quoting=csv.QUOTE_NOTNULL) + self._read_test([' , , '], + [[None, None, None]], + skipinitialspace=True, quoting=csv.QUOTE_STRINGS) + + # TODO: RUSTPYTHON + @unittest.expectedFailure + def test_read_space_delimiter(self): + self._read_test(['a b', ' a ', ' ', ''], + [['a', '', '', 'b'], ['', '', 'a', '', ''], ['', '', ''], []], + delimiter=' ', skipinitialspace=False) + self._read_test(['a b', ' a ', ' ', ''], + [['a', 'b'], ['a', ''], [''], []], + delimiter=' ', skipinitialspace=True) def test_read_bigfield(self): # This exercises the buffer realloc functionality and field size @@ -410,27 +544,49 @@ def test_read_linenum(self): self.assertRaises(StopIteration, next, r) self.assertEqual(r.line_num, 3) - # TODO: RUSTPYTHON only '\r\n' unsupported + # TODO: RUSTPYTHON @unittest.expectedFailure def test_roundtrip_quoteed_newlines(self): - with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: - writer = csv.writer(fileobj) - rows = [['a\nb','b'],['c','x\r\nd']] - writer.writerows(rows) - fileobj.seek(0) - for i, row in enumerate(csv.reader(fileobj)): - self.assertEqual(row, rows[i]) + rows = [ + ['\na', 'b\nc', 'd\n'], + ['\re', 'f\rg', 'h\r'], + ['\r\ni', 'j\r\nk', 'l\r\n'], + ['\n\rm', 'n\n\ro', 'p\n\r'], + ['\r\rq', 'r\r\rs', 't\r\r'], + ['\n\nu', 'v\n\nw', 'x\n\n'], + ] + for lineterminator in '\r\n', '\n', '\r': + with self.subTest(lineterminator=lineterminator): + with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: + writer = csv.writer(fileobj, lineterminator=lineterminator) + writer.writerows(rows) + fileobj.seek(0) + for i, row in enumerate(csv.reader(fileobj)): + self.assertEqual(row, rows[i]) - # TODO: RUSTPYTHON only '\r\n' unsupported + # TODO: RUSTPYTHON @unittest.expectedFailure def test_roundtrip_escaped_unquoted_newlines(self): - with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: - writer = csv.writer(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\") - rows = [['a\nb','b'],['c','x\r\nd']] - writer.writerows(rows) - fileobj.seek(0) - for i, row in enumerate(csv.reader(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")): - self.assertEqual(row,rows[i]) + rows = [ + ['\na', 'b\nc', 'd\n'], + ['\re', 'f\rg', 'h\r'], + ['\r\ni', 'j\r\nk', 'l\r\n'], + ['\n\rm', 'n\n\ro', 'p\n\r'], + ['\r\rq', 'r\r\rs', 't\r\r'], + ['\n\nu', 'v\n\nw', 'x\n\n'], + ] + for lineterminator in '\r\n', '\n', '\r': + with self.subTest(lineterminator=lineterminator): + with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: + writer = csv.writer(fileobj, lineterminator=lineterminator, + quoting=csv.QUOTE_NONE, escapechar="\\") + writer.writerows(rows) + fileobj.seek(0) + for i, row in enumerate(csv.reader(fileobj, + quoting=csv.QUOTE_NONE, + escapechar="\\")): + self.assertEqual(row, rows[i]) + class TestDialectRegistry(unittest.TestCase): def test_registry_badargs(self): @@ -509,10 +665,10 @@ class space(csv.excel): escapechar = "\\" with TemporaryFile("w+", encoding="utf-8") as fileobj: - fileobj.write("abc def\nc1ccccc1 benzene\n") + fileobj.write("abc def\nc1ccccc1 benzene\n") fileobj.seek(0) reader = csv.reader(fileobj, dialect=space()) - self.assertEqual(next(reader), ["abc", "def"]) + self.assertEqual(next(reader), ["abc", "", "", "def"]) self.assertEqual(next(reader), ["c1ccccc1", "benzene"]) def compare_dialect_123(self, expected, *writeargs, **kwwriteargs): @@ -556,14 +712,6 @@ class unspecified(): finally: csv.unregister_dialect('testC') - def test_bad_dialect(self): - # Unknown parameter - self.assertRaises(TypeError, csv.reader, [], bad_attr = 0) - # Bad values - self.assertRaises(TypeError, csv.reader, [], delimiter = None) - self.assertRaises(TypeError, csv.reader, [], quoting = -1) - self.assertRaises(TypeError, csv.reader, [], quoting = 100) - def test_copy(self): for name in csv.list_dialects(): dialect = csv.get_dialect(name) @@ -657,7 +805,7 @@ def test_quoted_quote(self): '"I see," said the blind man', 'as he picked up his hammer and saw']]) - # Rustpython TODO + # TODO: RUSTPYTHON @unittest.expectedFailure def test_quoted_nl(self): input = '''\ @@ -699,12 +847,12 @@ class EscapedExcel(csv.excel): class TestEscapedExcel(TestCsvBase): dialect = EscapedExcel() - # TODO RUSTPYTHON + # TODO: RUSTPYTHON @unittest.expectedFailure def test_escape_fieldsep(self): self.writerAssertEqual([['abc,def']], 'abc\\,def\r\n') - # TODO RUSTPYTHON + # TODO: RUSTPYTHON @unittest.expectedFailure def test_read_escape_fieldsep(self): self.readerAssertEqual('abc\\,def\r\n', [['abc,def']]) @@ -712,7 +860,7 @@ def test_read_escape_fieldsep(self): class TestDialectUnix(TestCsvBase): dialect = 'unix' - # TODO RUSTPYTHON + # TODO: RUSTPYTHON @unittest.expectedFailure def test_simple_writer(self): self.writerAssertEqual([[1, 'abc def', 'abc']], '"1","abc def","abc"\n') @@ -730,7 +878,7 @@ class TestQuotedEscapedExcel(TestCsvBase): def test_write_escape_fieldsep(self): self.writerAssertEqual([['abc,def']], '"abc,def"\r\n') - # TODO RUSTPYTHON + # TODO: RUSTPYTHON @unittest.expectedFailure def test_read_escape_fieldsep(self): self.readerAssertEqual('"abc\\,def"\r\n', [['abc,def']]) @@ -928,7 +1076,7 @@ def test_read_multi(self): "s1": 'abc', "s2": 'def'}) - # TODO RUSTPYTHON + # TODO: RUSTPYTHON @unittest.expectedFailure def test_read_with_blanks(self): reader = csv.DictReader(["1,2,abc,4,5,6\r\n","\r\n", @@ -981,9 +1129,11 @@ def test_float_write(self): fileobj.seek(0) self.assertEqual(fileobj.read(), expected) + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_char_write(self): import array, string - a = array.array('u', string.ascii_letters) + a = array.array('w', string.ascii_letters) with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj: writer = csv.writer(fileobj, dialect="excel") @@ -1007,6 +1157,12 @@ class mydialect(csv.Dialect): mydialect.quoting = None self.assertRaises(csv.Error, mydialect) + mydialect.quoting = 42 + with self.assertRaises(csv.Error) as cm: + mydialect() + self.assertEqual(str(cm.exception), + 'bad "quoting" value') + mydialect.doublequote = True mydialect.quoting = csv.QUOTE_ALL mydialect.quotechar = '"' @@ -1122,11 +1278,18 @@ class mydialect(csv.Dialect): self.assertEqual(str(cm.exception), '"lineterminator" must be a string') + # TODO: RUSTPYTHON + @unittest.expectedFailure def test_invalid_chars(self): - def create_invalid(field_name, value): + def create_invalid(field_name, value, **kwargs): class mydialect(csv.Dialect): - pass + delimiter = ',' + quoting = csv.QUOTE_ALL + quotechar = '"' + lineterminator = '\r\n' setattr(mydialect, field_name, value) + for field_name, value in kwargs.items(): + setattr(mydialect, field_name, value) d = mydialect() for field_name in ("delimiter", "escapechar", "quotechar"): @@ -1135,6 +1298,11 @@ class mydialect(csv.Dialect): self.assertRaises(csv.Error, create_invalid, field_name, "abc") self.assertRaises(csv.Error, create_invalid, field_name, b'x') self.assertRaises(csv.Error, create_invalid, field_name, 5) + self.assertRaises(ValueError, create_invalid, field_name, "\n") + self.assertRaises(ValueError, create_invalid, field_name, "\r") + if field_name != "delimiter": + self.assertRaises(ValueError, create_invalid, field_name, " ", + skipinitialspace=True) class TestSniffer(unittest.TestCase): @@ -1451,8 +1619,7 @@ def test_ordered_dict_reader(self): class MiscTestCase(unittest.TestCase): def test__all__(self): - extra = {'__doc__', '__version__'} - support.check__all__(self, csv, ('csv', '_csv'), extra=extra) + support.check__all__(self, csv, ('csv', '_csv')) def test_subclassable(self): # issue 44089 diff --git a/stdlib/src/csv.rs b/stdlib/src/csv.rs index bded06e37f..5f6d249476 100644 --- a/stdlib/src/csv.rs +++ b/stdlib/src/csv.rs @@ -8,6 +8,7 @@ mod _csv { builtins::{PyBaseExceptionRef, PyInt, PyNone, PyStr, PyType, PyTypeError, PyTypeRef}, function::{ArgIterable, ArgumentError, FromArgs, FuncArgs, OptionalArg}, protocol::{PyIter, PyIterReturn}, + raise_if_stop, types::{Constructor, IterNext, Iterable, SelfIter}, }; use csv_core::Terminator; @@ -923,10 +924,7 @@ mod _csv { impl SelfIter for Reader {} impl IterNext for Reader { fn next(zelf: &Py, vm: &VirtualMachine) -> PyResult { - let string = match zelf.iter.next(vm)? { - PyIterReturn::Return(obj) => obj, - PyIterReturn::StopIteration(v) => return Ok(PyIterReturn::StopIteration(v)), - }; + let string = raise_if_stop!(zelf.iter.next(vm)?); let string = string.downcast::().map_err(|obj| { new_csv_error( vm,