PageRenderTime 948ms CodeModel.GetById 141ms app.highlight 620ms RepoModel.GetById 176ms app.codeStats 0ms

/Lib/test/test_csv.py

http://unladen-swallow.googlecode.com/
Python | 1030 lines | 1016 code | 5 blank | 9 comment | 2 complexity | cb795e56a265af25fc51851a189c25de MD5 | raw file
   1# -*- coding: iso-8859-1 -*-
   2# Copyright (C) 2001,2002 Python Software Foundation
   3# csv package unit tests
   4
   5import sys
   6import os
   7import unittest
   8from StringIO import StringIO
   9import tempfile
  10import csv
  11import gc
  12from test import test_support
  13
  14class Test_Csv(unittest.TestCase):
  15    """
  16    Test the underlying C csv parser in ways that are not appropriate
  17    from the high level interface. Further tests of this nature are done
  18    in TestDialectRegistry.
  19    """
  20    def _test_arg_valid(self, ctor, arg):
  21        self.assertRaises(TypeError, ctor)
  22        self.assertRaises(TypeError, ctor, None)
  23        self.assertRaises(TypeError, ctor, arg, bad_attr = 0)
  24        self.assertRaises(TypeError, ctor, arg, delimiter = 0)
  25        self.assertRaises(TypeError, ctor, arg, delimiter = 'XX')
  26        self.assertRaises(csv.Error, ctor, arg, 'foo')
  27        self.assertRaises(TypeError, ctor, arg, delimiter=None)
  28        self.assertRaises(TypeError, ctor, arg, delimiter=1)
  29        self.assertRaises(TypeError, ctor, arg, quotechar=1)
  30        self.assertRaises(TypeError, ctor, arg, lineterminator=None)
  31        self.assertRaises(TypeError, ctor, arg, lineterminator=1)
  32        self.assertRaises(TypeError, ctor, arg, quoting=None)
  33        self.assertRaises(TypeError, ctor, arg,
  34                          quoting=csv.QUOTE_ALL, quotechar='')
  35        self.assertRaises(TypeError, ctor, arg,
  36                          quoting=csv.QUOTE_ALL, quotechar=None)
  37
  38    def test_reader_arg_valid(self):
  39        self._test_arg_valid(csv.reader, [])
  40
  41    def test_writer_arg_valid(self):
  42        self._test_arg_valid(csv.writer, StringIO())
  43
  44    def _test_default_attrs(self, ctor, *args):
  45        obj = ctor(*args)
  46        # Check defaults
  47        self.assertEqual(obj.dialect.delimiter, ',')
  48        self.assertEqual(obj.dialect.doublequote, True)
  49        self.assertEqual(obj.dialect.escapechar, None)
  50        self.assertEqual(obj.dialect.lineterminator, "\r\n")
  51        self.assertEqual(obj.dialect.quotechar, '"')
  52        self.assertEqual(obj.dialect.quoting, csv.QUOTE_MINIMAL)
  53        self.assertEqual(obj.dialect.skipinitialspace, False)
  54        self.assertEqual(obj.dialect.strict, False)
  55        # Try deleting or changing attributes (they are read-only)
  56        self.assertRaises(TypeError, delattr, obj.dialect, 'delimiter')
  57        self.assertRaises(TypeError, setattr, obj.dialect, 'delimiter', ':')
  58        self.assertRaises(AttributeError, delattr, obj.dialect, 'quoting')
  59        self.assertRaises(AttributeError, setattr, obj.dialect,
  60                          'quoting', None)
  61
  62    def test_reader_attrs(self):
  63        self._test_default_attrs(csv.reader, [])
  64
  65    def test_writer_attrs(self):
  66        self._test_default_attrs(csv.writer, StringIO())
  67
  68    def _test_kw_attrs(self, ctor, *args):
  69        # Now try with alternate options
  70        kwargs = dict(delimiter=':', doublequote=False, escapechar='\\',
  71                      lineterminator='\r', quotechar='*',
  72                      quoting=csv.QUOTE_NONE, skipinitialspace=True,
  73                      strict=True)
  74        obj = ctor(*args, **kwargs)
  75        self.assertEqual(obj.dialect.delimiter, ':')
  76        self.assertEqual(obj.dialect.doublequote, False)
  77        self.assertEqual(obj.dialect.escapechar, '\\')
  78        self.assertEqual(obj.dialect.lineterminator, "\r")
  79        self.assertEqual(obj.dialect.quotechar, '*')
  80        self.assertEqual(obj.dialect.quoting, csv.QUOTE_NONE)
  81        self.assertEqual(obj.dialect.skipinitialspace, True)
  82        self.assertEqual(obj.dialect.strict, True)
  83
  84    def test_reader_kw_attrs(self):
  85        self._test_kw_attrs(csv.reader, [])
  86
  87    def test_writer_kw_attrs(self):
  88        self._test_kw_attrs(csv.writer, StringIO())
  89
  90    def _test_dialect_attrs(self, ctor, *args):
  91        # Now try with dialect-derived options
  92        class dialect:
  93            delimiter='-'
  94            doublequote=False
  95            escapechar='^'
  96            lineterminator='$'
  97            quotechar='#'
  98            quoting=csv.QUOTE_ALL
  99            skipinitialspace=True
 100            strict=False
 101        args = args + (dialect,)
 102        obj = ctor(*args)
 103        self.assertEqual(obj.dialect.delimiter, '-')
 104        self.assertEqual(obj.dialect.doublequote, False)
 105        self.assertEqual(obj.dialect.escapechar, '^')
 106        self.assertEqual(obj.dialect.lineterminator, "$")
 107        self.assertEqual(obj.dialect.quotechar, '#')
 108        self.assertEqual(obj.dialect.quoting, csv.QUOTE_ALL)
 109        self.assertEqual(obj.dialect.skipinitialspace, True)
 110        self.assertEqual(obj.dialect.strict, False)
 111
 112    def test_reader_dialect_attrs(self):
 113        self._test_dialect_attrs(csv.reader, [])
 114
 115    def test_writer_dialect_attrs(self):
 116        self._test_dialect_attrs(csv.writer, StringIO())
 117
 118
 119    def _write_test(self, fields, expect, **kwargs):
 120        fd, name = tempfile.mkstemp()
 121        fileobj = os.fdopen(fd, "w+b")
 122        try:
 123            writer = csv.writer(fileobj, **kwargs)
 124            writer.writerow(fields)
 125            fileobj.seek(0)
 126            self.assertEqual(fileobj.read(),
 127                             expect + writer.dialect.lineterminator)
 128        finally:
 129            fileobj.close()
 130            os.unlink(name)
 131
 132    def test_write_arg_valid(self):
 133        self.assertRaises(csv.Error, self._write_test, None, '')
 134        self._write_test((), '')
 135        self._write_test([None], '""')
 136        self.assertRaises(csv.Error, self._write_test,
 137                          [None], None, quoting = csv.QUOTE_NONE)
 138        # Check that exceptions are passed up the chain
 139        class BadList:
 140            def __len__(self):
 141                return 10;
 142            def __getitem__(self, i):
 143                if i > 2:
 144                    raise IOError
 145        self.assertRaises(IOError, self._write_test, BadList(), '')
 146        class BadItem:
 147            def __str__(self):
 148                raise IOError
 149        self.assertRaises(IOError, self._write_test, [BadItem()], '')
 150
 151    def test_write_bigfield(self):
 152        # This exercises the buffer realloc functionality
 153        bigstring = 'X' * 50000
 154        self._write_test([bigstring,bigstring], '%s,%s' % \
 155                         (bigstring, bigstring))
 156
 157    def test_write_quoting(self):
 158        self._write_test(['a',1,'p,q'], 'a,1,"p,q"')
 159        self.assertRaises(csv.Error,
 160                          self._write_test,
 161                          ['a',1,'p,q'], 'a,1,p,q',
 162                          quoting = csv.QUOTE_NONE)
 163        self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
 164                         quoting = csv.QUOTE_MINIMAL)
 165        self._write_test(['a',1,'p,q'], '"a",1,"p,q"',
 166                         quoting = csv.QUOTE_NONNUMERIC)
 167        self._write_test(['a',1,'p,q'], '"a","1","p,q"',
 168                         quoting = csv.QUOTE_ALL)
 169
 170    def test_write_escape(self):
 171        self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
 172                         escapechar='\\')
 173        self.assertRaises(csv.Error,
 174                          self._write_test,
 175                          ['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
 176                          escapechar=None, doublequote=False)
 177        self._write_test(['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
 178                         escapechar='\\', doublequote = False)
 179        self._write_test(['"'], '""""',
 180                         escapechar='\\', quoting = csv.QUOTE_MINIMAL)
 181        self._write_test(['"'], '\\"',
 182                         escapechar='\\', quoting = csv.QUOTE_MINIMAL,
 183                         doublequote = False)
 184        self._write_test(['"'], '\\"',
 185                         escapechar='\\', quoting = csv.QUOTE_NONE)
 186        self._write_test(['a',1,'p,q'], 'a,1,p\\,q',
 187                         escapechar='\\', quoting = csv.QUOTE_NONE)
 188
 189    def test_writerows(self):
 190        class BrokenFile:
 191            def write(self, buf):
 192                raise IOError
 193        writer = csv.writer(BrokenFile())
 194        self.assertRaises(IOError, writer.writerows, [['a']])
 195        fd, name = tempfile.mkstemp()
 196        fileobj = os.fdopen(fd, "w+b")
 197        try:
 198            writer = csv.writer(fileobj)
 199            self.assertRaises(TypeError, writer.writerows, None)
 200            writer.writerows([['a','b'],['c','d']])
 201            fileobj.seek(0)
 202            self.assertEqual(fileobj.read(), "a,b\r\nc,d\r\n")
 203        finally:
 204            fileobj.close()
 205            os.unlink(name)
 206
 207    def _read_test(self, input, expect, **kwargs):
 208        reader = csv.reader(input, **kwargs)
 209        result = list(reader)
 210        self.assertEqual(result, expect)
 211
 212    def test_read_oddinputs(self):
 213        self._read_test([], [])
 214        self._read_test([''], [[]])
 215        self.assertRaises(csv.Error, self._read_test,
 216                          ['"ab"c'], None, strict = 1)
 217        # cannot handle null bytes for the moment
 218        self.assertRaises(csv.Error, self._read_test,
 219                          ['ab\0c'], None, strict = 1)
 220        self._read_test(['"ab"c'], [['abc']], doublequote = 0)
 221
 222    def test_read_eol(self):
 223        self._read_test(['a,b'], [['a','b']])
 224        self._read_test(['a,b\n'], [['a','b']])
 225        self._read_test(['a,b\r\n'], [['a','b']])
 226        self._read_test(['a,b\r'], [['a','b']])
 227        self.assertRaises(csv.Error, self._read_test, ['a,b\rc,d'], [])
 228        self.assertRaises(csv.Error, self._read_test, ['a,b\nc,d'], [])
 229        self.assertRaises(csv.Error, self._read_test, ['a,b\r\nc,d'], [])
 230
 231    def test_read_escape(self):
 232        self._read_test(['a,\\b,c'], [['a', 'b', 'c']], escapechar='\\')
 233        self._read_test(['a,b\\,c'], [['a', 'b,c']], escapechar='\\')
 234        self._read_test(['a,"b\\,c"'], [['a', 'b,c']], escapechar='\\')
 235        self._read_test(['a,"b,\\c"'], [['a', 'b,c']], escapechar='\\')
 236        self._read_test(['a,"b,c\\""'], [['a', 'b,c"']], escapechar='\\')
 237        self._read_test(['a,"b,c"\\'], [['a', 'b,c\\']], escapechar='\\')
 238
 239    def test_read_quoting(self):
 240        self._read_test(['1,",3,",5'], [['1', ',3,', '5']])
 241        self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']],
 242                        quotechar=None, escapechar='\\')
 243        self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']],
 244                        quoting=csv.QUOTE_NONE, escapechar='\\')
 245        # will this fail where locale uses comma for decimals?
 246        self._read_test([',3,"5",7.3, 9'], [['', 3, '5', 7.3, 9]],
 247                        quoting=csv.QUOTE_NONNUMERIC)
 248        self.assertRaises(ValueError, self._read_test,
 249                          ['abc,3'], [[]],
 250                          quoting=csv.QUOTE_NONNUMERIC)
 251
 252    def test_read_bigfield(self):
 253        # This exercises the buffer realloc functionality and field size
 254        # limits.
 255        limit = csv.field_size_limit()
 256        try:
 257            size = 50000
 258            bigstring = 'X' * size
 259            bigline = '%s,%s' % (bigstring, bigstring)
 260            self._read_test([bigline], [[bigstring, bigstring]])
 261            csv.field_size_limit(size)
 262            self._read_test([bigline], [[bigstring, bigstring]])
 263            self.assertEqual(csv.field_size_limit(), size)
 264            csv.field_size_limit(size-1)
 265            self.assertRaises(csv.Error, self._read_test, [bigline], [])
 266            self.assertRaises(TypeError, csv.field_size_limit, None)
 267            self.assertRaises(TypeError, csv.field_size_limit, 1, None)
 268        finally:
 269            csv.field_size_limit(limit)
 270
 271    def test_read_linenum(self):
 272        for r in (csv.reader(['line,1', 'line,2', 'line,3']),
 273                  csv.DictReader(['line,1', 'line,2', 'line,3'],
 274                                 fieldnames=['a', 'b', 'c'])):
 275            self.assertEqual(r.line_num, 0)
 276            r.next()
 277            self.assertEqual(r.line_num, 1)
 278            r.next()
 279            self.assertEqual(r.line_num, 2)
 280            r.next()
 281            self.assertEqual(r.line_num, 3)
 282            self.assertRaises(StopIteration, r.next)
 283            self.assertEqual(r.line_num, 3)
 284
 285class TestDialectRegistry(unittest.TestCase):
 286    def test_registry_badargs(self):
 287        self.assertRaises(TypeError, csv.list_dialects, None)
 288        self.assertRaises(TypeError, csv.get_dialect)
 289        self.assertRaises(csv.Error, csv.get_dialect, None)
 290        self.assertRaises(csv.Error, csv.get_dialect, "nonesuch")
 291        self.assertRaises(TypeError, csv.unregister_dialect)
 292        self.assertRaises(csv.Error, csv.unregister_dialect, None)
 293        self.assertRaises(csv.Error, csv.unregister_dialect, "nonesuch")
 294        self.assertRaises(TypeError, csv.register_dialect, None)
 295        self.assertRaises(TypeError, csv.register_dialect, None, None)
 296        self.assertRaises(TypeError, csv.register_dialect, "nonesuch", 0, 0)
 297        self.assertRaises(TypeError, csv.register_dialect, "nonesuch",
 298                          badargument=None)
 299        self.assertRaises(TypeError, csv.register_dialect, "nonesuch",
 300                          quoting=None)
 301        self.assertRaises(TypeError, csv.register_dialect, [])
 302
 303    def test_registry(self):
 304        class myexceltsv(csv.excel):
 305            delimiter = "\t"
 306        name = "myexceltsv"
 307        expected_dialects = csv.list_dialects() + [name]
 308        expected_dialects.sort()
 309        csv.register_dialect(name, myexceltsv)
 310        try:
 311            self.failUnless(csv.get_dialect(name).delimiter, '\t')
 312            got_dialects = csv.list_dialects()
 313            got_dialects.sort()
 314            self.assertEqual(expected_dialects, got_dialects)
 315        finally:
 316            csv.unregister_dialect(name)
 317
 318    def test_register_kwargs(self):
 319        name = 'fedcba'
 320        csv.register_dialect(name, delimiter=';')
 321        try:
 322            self.failUnless(csv.get_dialect(name).delimiter, '\t')
 323            self.failUnless(list(csv.reader('X;Y;Z', name)), ['X', 'Y', 'Z'])
 324        finally:
 325            csv.unregister_dialect(name)
 326
 327    def test_incomplete_dialect(self):
 328        class myexceltsv(csv.Dialect):
 329            delimiter = "\t"
 330        self.assertRaises(csv.Error, myexceltsv)
 331
 332    def test_space_dialect(self):
 333        class space(csv.excel):
 334            delimiter = " "
 335            quoting = csv.QUOTE_NONE
 336            escapechar = "\\"
 337
 338        fd, name = tempfile.mkstemp()
 339        fileobj = os.fdopen(fd, "w+b")
 340        try:
 341            fileobj.write("abc def\nc1ccccc1 benzene\n")
 342            fileobj.seek(0)
 343            rdr = csv.reader(fileobj, dialect=space())
 344            self.assertEqual(rdr.next(), ["abc", "def"])
 345            self.assertEqual(rdr.next(), ["c1ccccc1", "benzene"])
 346        finally:
 347            fileobj.close()
 348            os.unlink(name)
 349
 350    def test_dialect_apply(self):
 351        class testA(csv.excel):
 352            delimiter = "\t"
 353        class testB(csv.excel):
 354            delimiter = ":"
 355        class testC(csv.excel):
 356            delimiter = "|"
 357
 358        csv.register_dialect('testC', testC)
 359        try:
 360            fd, name = tempfile.mkstemp()
 361            fileobj = os.fdopen(fd, "w+b")
 362            try:
 363                writer = csv.writer(fileobj)
 364                writer.writerow([1,2,3])
 365                fileobj.seek(0)
 366                self.assertEqual(fileobj.read(), "1,2,3\r\n")
 367            finally:
 368                fileobj.close()
 369                os.unlink(name)
 370
 371            fd, name = tempfile.mkstemp()
 372            fileobj = os.fdopen(fd, "w+b")
 373            try:
 374                writer = csv.writer(fileobj, testA)
 375                writer.writerow([1,2,3])
 376                fileobj.seek(0)
 377                self.assertEqual(fileobj.read(), "1\t2\t3\r\n")
 378            finally:
 379                fileobj.close()
 380                os.unlink(name)
 381
 382            fd, name = tempfile.mkstemp()
 383            fileobj = os.fdopen(fd, "w+b")
 384            try:
 385                writer = csv.writer(fileobj, dialect=testB())
 386                writer.writerow([1,2,3])
 387                fileobj.seek(0)
 388                self.assertEqual(fileobj.read(), "1:2:3\r\n")
 389            finally:
 390                fileobj.close()
 391                os.unlink(name)
 392
 393            fd, name = tempfile.mkstemp()
 394            fileobj = os.fdopen(fd, "w+b")
 395            try:
 396                writer = csv.writer(fileobj, dialect='testC')
 397                writer.writerow([1,2,3])
 398                fileobj.seek(0)
 399                self.assertEqual(fileobj.read(), "1|2|3\r\n")
 400            finally:
 401                fileobj.close()
 402                os.unlink(name)
 403
 404            fd, name = tempfile.mkstemp()
 405            fileobj = os.fdopen(fd, "w+b")
 406            try:
 407                writer = csv.writer(fileobj, dialect=testA, delimiter=';')
 408                writer.writerow([1,2,3])
 409                fileobj.seek(0)
 410                self.assertEqual(fileobj.read(), "1;2;3\r\n")
 411            finally:
 412                fileobj.close()
 413                os.unlink(name)
 414
 415        finally:
 416            csv.unregister_dialect('testC')
 417
 418    def test_bad_dialect(self):
 419        # Unknown parameter
 420        self.assertRaises(TypeError, csv.reader, [], bad_attr = 0)
 421        # Bad values
 422        self.assertRaises(TypeError, csv.reader, [], delimiter = None)
 423        self.assertRaises(TypeError, csv.reader, [], quoting = -1)
 424        self.assertRaises(TypeError, csv.reader, [], quoting = 100)
 425
 426class TestCsvBase(unittest.TestCase):
 427    def readerAssertEqual(self, input, expected_result):
 428        fd, name = tempfile.mkstemp()
 429        fileobj = os.fdopen(fd, "w+b")
 430        try:
 431            fileobj.write(input)
 432            fileobj.seek(0)
 433            reader = csv.reader(fileobj, dialect = self.dialect)
 434            fields = list(reader)
 435            self.assertEqual(fields, expected_result)
 436        finally:
 437            fileobj.close()
 438            os.unlink(name)
 439
 440    def writerAssertEqual(self, input, expected_result):
 441        fd, name = tempfile.mkstemp()
 442        fileobj = os.fdopen(fd, "w+b")
 443        try:
 444            writer = csv.writer(fileobj, dialect = self.dialect)
 445            writer.writerows(input)
 446            fileobj.seek(0)
 447            self.assertEqual(fileobj.read(), expected_result)
 448        finally:
 449            fileobj.close()
 450            os.unlink(name)
 451
 452class TestDialectExcel(TestCsvBase):
 453    dialect = 'excel'
 454
 455    def test_single(self):
 456        self.readerAssertEqual('abc', [['abc']])
 457
 458    def test_simple(self):
 459        self.readerAssertEqual('1,2,3,4,5', [['1','2','3','4','5']])
 460
 461    def test_blankline(self):
 462        self.readerAssertEqual('', [])
 463
 464    def test_empty_fields(self):
 465        self.readerAssertEqual(',', [['', '']])
 466
 467    def test_singlequoted(self):
 468        self.readerAssertEqual('""', [['']])
 469
 470    def test_singlequoted_left_empty(self):
 471        self.readerAssertEqual('"",', [['','']])
 472
 473    def test_singlequoted_right_empty(self):
 474        self.readerAssertEqual(',""', [['','']])
 475
 476    def test_single_quoted_quote(self):
 477        self.readerAssertEqual('""""', [['"']])
 478
 479    def test_quoted_quotes(self):
 480        self.readerAssertEqual('""""""', [['""']])
 481
 482    def test_inline_quote(self):
 483        self.readerAssertEqual('a""b', [['a""b']])
 484
 485    def test_inline_quotes(self):
 486        self.readerAssertEqual('a"b"c', [['a"b"c']])
 487
 488    def test_quotes_and_more(self):
 489        # Excel would never write a field containing '"a"b', but when
 490        # reading one, it will return 'ab'.
 491        self.readerAssertEqual('"a"b', [['ab']])
 492
 493    def test_lone_quote(self):
 494        self.readerAssertEqual('a"b', [['a"b']])
 495
 496    def test_quote_and_quote(self):
 497        # Excel would never write a field containing '"a" "b"', but when
 498        # reading one, it will return 'a "b"'.
 499        self.readerAssertEqual('"a" "b"', [['a "b"']])
 500
 501    def test_space_and_quote(self):
 502        self.readerAssertEqual(' "a"', [[' "a"']])
 503
 504    def test_quoted(self):
 505        self.readerAssertEqual('1,2,3,"I think, therefore I am",5,6',
 506                               [['1', '2', '3',
 507                                 'I think, therefore I am',
 508                                 '5', '6']])
 509
 510    def test_quoted_quote(self):
 511        self.readerAssertEqual('1,2,3,"""I see,"" said the blind man","as he picked up his hammer and saw"',
 512                               [['1', '2', '3',
 513                                 '"I see," said the blind man',
 514                                 'as he picked up his hammer and saw']])
 515
 516    def test_quoted_nl(self):
 517        input = '''\
 5181,2,3,"""I see,""
 519said the blind man","as he picked up his
 520hammer and saw"
 5219,8,7,6'''
 522        self.readerAssertEqual(input,
 523                               [['1', '2', '3',
 524                                   '"I see,"\nsaid the blind man',
 525                                   'as he picked up his\nhammer and saw'],
 526                                ['9','8','7','6']])
 527
 528    def test_dubious_quote(self):
 529        self.readerAssertEqual('12,12,1",', [['12', '12', '1"', '']])
 530
 531    def test_null(self):
 532        self.writerAssertEqual([], '')
 533
 534    def test_single(self):
 535        self.writerAssertEqual([['abc']], 'abc\r\n')
 536
 537    def test_simple(self):
 538        self.writerAssertEqual([[1, 2, 'abc', 3, 4]], '1,2,abc,3,4\r\n')
 539
 540    def test_quotes(self):
 541        self.writerAssertEqual([[1, 2, 'a"bc"', 3, 4]], '1,2,"a""bc""",3,4\r\n')
 542
 543    def test_quote_fieldsep(self):
 544        self.writerAssertEqual([['abc,def']], '"abc,def"\r\n')
 545
 546    def test_newlines(self):
 547        self.writerAssertEqual([[1, 2, 'a\nbc', 3, 4]], '1,2,"a\nbc",3,4\r\n')
 548
 549class EscapedExcel(csv.excel):
 550    quoting = csv.QUOTE_NONE
 551    escapechar = '\\'
 552
 553class TestEscapedExcel(TestCsvBase):
 554    dialect = EscapedExcel()
 555
 556    def test_escape_fieldsep(self):
 557        self.writerAssertEqual([['abc,def']], 'abc\\,def\r\n')
 558
 559    def test_read_escape_fieldsep(self):
 560        self.readerAssertEqual('abc\\,def\r\n', [['abc,def']])
 561
 562class QuotedEscapedExcel(csv.excel):
 563    quoting = csv.QUOTE_NONNUMERIC
 564    escapechar = '\\'
 565
 566class TestQuotedEscapedExcel(TestCsvBase):
 567    dialect = QuotedEscapedExcel()
 568
 569    def test_write_escape_fieldsep(self):
 570        self.writerAssertEqual([['abc,def']], '"abc,def"\r\n')
 571
 572    def test_read_escape_fieldsep(self):
 573        self.readerAssertEqual('"abc\\,def"\r\n', [['abc,def']])
 574
 575class TestDictFields(unittest.TestCase):
 576    ### "long" means the row is longer than the number of fieldnames
 577    ### "short" means there are fewer elements in the row than fieldnames
 578    def test_write_simple_dict(self):
 579        fd, name = tempfile.mkstemp()
 580        fileobj = os.fdopen(fd, "w+b")
 581        try:
 582            writer = csv.DictWriter(fileobj, fieldnames = ["f1", "f2", "f3"])
 583            writer.writerow({"f1": 10, "f3": "abc"})
 584            fileobj.seek(0)
 585            self.assertEqual(fileobj.read(), "10,,abc\r\n")
 586        finally:
 587            fileobj.close()
 588            os.unlink(name)
 589
 590    def test_write_no_fields(self):
 591        fileobj = StringIO()
 592        self.assertRaises(TypeError, csv.DictWriter, fileobj)
 593
 594    def test_read_dict_fields(self):
 595        fd, name = tempfile.mkstemp()
 596        fileobj = os.fdopen(fd, "w+b")
 597        try:
 598            fileobj.write("1,2,abc\r\n")
 599            fileobj.seek(0)
 600            reader = csv.DictReader(fileobj,
 601                                    fieldnames=["f1", "f2", "f3"])
 602            self.assertEqual(reader.next(), {"f1": '1', "f2": '2', "f3": 'abc'})
 603        finally:
 604            fileobj.close()
 605            os.unlink(name)
 606
 607    def test_read_dict_no_fieldnames(self):
 608        fd, name = tempfile.mkstemp()
 609        fileobj = os.fdopen(fd, "w+b")
 610        try:
 611            fileobj.write("f1,f2,f3\r\n1,2,abc\r\n")
 612            fileobj.seek(0)
 613            reader = csv.DictReader(fileobj)
 614            self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"])
 615            self.assertEqual(reader.next(), {"f1": '1', "f2": '2', "f3": 'abc'})
 616        finally:
 617            fileobj.close()
 618            os.unlink(name)
 619
 620    # Two test cases to make sure existing ways of implicitly setting
 621    # fieldnames continue to work.  Both arise from discussion in issue3436.
 622    def test_read_dict_fieldnames_from_file(self):
 623        fd, name = tempfile.mkstemp()
 624        f = os.fdopen(fd, "w+b")
 625        try:
 626            f.write("f1,f2,f3\r\n1,2,abc\r\n")
 627            f.seek(0)
 628            reader = csv.DictReader(f, fieldnames=csv.reader(f).next())
 629            self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"])
 630            self.assertEqual(reader.next(), {"f1": '1', "f2": '2', "f3": 'abc'})
 631        finally:
 632            f.close()
 633            os.unlink(name)
 634
 635    def test_read_dict_fieldnames_chain(self):
 636        import itertools
 637        fd, name = tempfile.mkstemp()
 638        f = os.fdopen(fd, "w+b")
 639        try:
 640            f.write("f1,f2,f3\r\n1,2,abc\r\n")
 641            f.seek(0)
 642            reader = csv.DictReader(f)
 643            first = next(reader)
 644            for row in itertools.chain([first], reader):
 645                self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"])
 646                self.assertEqual(row, {"f1": '1', "f2": '2', "f3": 'abc'})
 647        finally:
 648            f.close()
 649            os.unlink(name)
 650
 651    def test_read_long(self):
 652        fd, name = tempfile.mkstemp()
 653        fileobj = os.fdopen(fd, "w+b")
 654        try:
 655            fileobj.write("1,2,abc,4,5,6\r\n")
 656            fileobj.seek(0)
 657            reader = csv.DictReader(fileobj,
 658                                    fieldnames=["f1", "f2"])
 659            self.assertEqual(reader.next(), {"f1": '1', "f2": '2',
 660                                             None: ["abc", "4", "5", "6"]})
 661        finally:
 662            fileobj.close()
 663            os.unlink(name)
 664
 665    def test_read_long_with_rest(self):
 666        fd, name = tempfile.mkstemp()
 667        fileobj = os.fdopen(fd, "w+b")
 668        try:
 669            fileobj.write("1,2,abc,4,5,6\r\n")
 670            fileobj.seek(0)
 671            reader = csv.DictReader(fileobj,
 672                                    fieldnames=["f1", "f2"], restkey="_rest")
 673            self.assertEqual(reader.next(), {"f1": '1', "f2": '2',
 674                                             "_rest": ["abc", "4", "5", "6"]})
 675        finally:
 676            fileobj.close()
 677            os.unlink(name)
 678
 679    def test_read_long_with_rest_no_fieldnames(self):
 680        fd, name = tempfile.mkstemp()
 681        fileobj = os.fdopen(fd, "w+b")
 682        try:
 683            fileobj.write("f1,f2\r\n1,2,abc,4,5,6\r\n")
 684            fileobj.seek(0)
 685            reader = csv.DictReader(fileobj, restkey="_rest")
 686            self.assertEqual(reader.fieldnames, ["f1", "f2"])
 687            self.assertEqual(reader.next(), {"f1": '1', "f2": '2',
 688                                             "_rest": ["abc", "4", "5", "6"]})
 689        finally:
 690            fileobj.close()
 691            os.unlink(name)
 692
 693    def test_read_short(self):
 694        fd, name = tempfile.mkstemp()
 695        fileobj = os.fdopen(fd, "w+b")
 696        try:
 697            fileobj.write("1,2,abc,4,5,6\r\n1,2,abc\r\n")
 698            fileobj.seek(0)
 699            reader = csv.DictReader(fileobj,
 700                                    fieldnames="1 2 3 4 5 6".split(),
 701                                    restval="DEFAULT")
 702            self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
 703                                             "4": '4', "5": '5', "6": '6'})
 704            self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
 705                                             "4": 'DEFAULT', "5": 'DEFAULT',
 706                                             "6": 'DEFAULT'})
 707        finally:
 708            fileobj.close()
 709            os.unlink(name)
 710
 711    def test_read_multi(self):
 712        sample = [
 713            '2147483648,43.0e12,17,abc,def\r\n',
 714            '147483648,43.0e2,17,abc,def\r\n',
 715            '47483648,43.0,170,abc,def\r\n'
 716            ]
 717
 718        reader = csv.DictReader(sample,
 719                                fieldnames="i1 float i2 s1 s2".split())
 720        self.assertEqual(reader.next(), {"i1": '2147483648',
 721                                         "float": '43.0e12',
 722                                         "i2": '17',
 723                                         "s1": 'abc',
 724                                         "s2": 'def'})
 725
 726    def test_read_with_blanks(self):
 727        reader = csv.DictReader(["1,2,abc,4,5,6\r\n","\r\n",
 728                                 "1,2,abc,4,5,6\r\n"],
 729                                fieldnames="1 2 3 4 5 6".split())
 730        self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
 731                                         "4": '4', "5": '5', "6": '6'})
 732        self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
 733                                         "4": '4', "5": '5', "6": '6'})
 734
 735    def test_read_semi_sep(self):
 736        reader = csv.DictReader(["1;2;abc;4;5;6\r\n"],
 737                                fieldnames="1 2 3 4 5 6".split(),
 738                                delimiter=';')
 739        self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
 740                                         "4": '4', "5": '5', "6": '6'})
 741
 742class TestArrayWrites(unittest.TestCase):
 743    def test_int_write(self):
 744        import array
 745        contents = [(20-i) for i in range(20)]
 746        a = array.array('i', contents)
 747
 748        fd, name = tempfile.mkstemp()
 749        fileobj = os.fdopen(fd, "w+b")
 750        try:
 751            writer = csv.writer(fileobj, dialect="excel")
 752            writer.writerow(a)
 753            expected = ",".join([str(i) for i in a])+"\r\n"
 754            fileobj.seek(0)
 755            self.assertEqual(fileobj.read(), expected)
 756        finally:
 757            fileobj.close()
 758            os.unlink(name)
 759
 760    def test_double_write(self):
 761        import array
 762        contents = [(20-i)*0.1 for i in range(20)]
 763        a = array.array('d', contents)
 764        fd, name = tempfile.mkstemp()
 765        fileobj = os.fdopen(fd, "w+b")
 766        try:
 767            writer = csv.writer(fileobj, dialect="excel")
 768            writer.writerow(a)
 769            expected = ",".join([str(i) for i in a])+"\r\n"
 770            fileobj.seek(0)
 771            self.assertEqual(fileobj.read(), expected)
 772        finally:
 773            fileobj.close()
 774            os.unlink(name)
 775
 776    def test_float_write(self):
 777        import array
 778        contents = [(20-i)*0.1 for i in range(20)]
 779        a = array.array('f', contents)
 780        fd, name = tempfile.mkstemp()
 781        fileobj = os.fdopen(fd, "w+b")
 782        try:
 783            writer = csv.writer(fileobj, dialect="excel")
 784            writer.writerow(a)
 785            expected = ",".join([str(i) for i in a])+"\r\n"
 786            fileobj.seek(0)
 787            self.assertEqual(fileobj.read(), expected)
 788        finally:
 789            fileobj.close()
 790            os.unlink(name)
 791
 792    def test_char_write(self):
 793        import array, string
 794        a = array.array('c', string.letters)
 795        fd, name = tempfile.mkstemp()
 796        fileobj = os.fdopen(fd, "w+b")
 797        try:
 798            writer = csv.writer(fileobj, dialect="excel")
 799            writer.writerow(a)
 800            expected = ",".join(a)+"\r\n"
 801            fileobj.seek(0)
 802            self.assertEqual(fileobj.read(), expected)
 803        finally:
 804            fileobj.close()
 805            os.unlink(name)
 806
 807class TestDialectValidity(unittest.TestCase):
 808    def test_quoting(self):
 809        class mydialect(csv.Dialect):
 810            delimiter = ";"
 811            escapechar = '\\'
 812            doublequote = False
 813            skipinitialspace = True
 814            lineterminator = '\r\n'
 815            quoting = csv.QUOTE_NONE
 816        d = mydialect()
 817
 818        mydialect.quoting = None
 819        self.assertRaises(csv.Error, mydialect)
 820
 821        mydialect.doublequote = True
 822        mydialect.quoting = csv.QUOTE_ALL
 823        mydialect.quotechar = '"'
 824        d = mydialect()
 825
 826        mydialect.quotechar = "''"
 827        self.assertRaises(csv.Error, mydialect)
 828
 829        mydialect.quotechar = 4
 830        self.assertRaises(csv.Error, mydialect)
 831
 832    def test_delimiter(self):
 833        class mydialect(csv.Dialect):
 834            delimiter = ";"
 835            escapechar = '\\'
 836            doublequote = False
 837            skipinitialspace = True
 838            lineterminator = '\r\n'
 839            quoting = csv.QUOTE_NONE
 840        d = mydialect()
 841
 842        mydialect.delimiter = ":::"
 843        self.assertRaises(csv.Error, mydialect)
 844
 845        mydialect.delimiter = 4
 846        self.assertRaises(csv.Error, mydialect)
 847
 848    def test_lineterminator(self):
 849        class mydialect(csv.Dialect):
 850            delimiter = ";"
 851            escapechar = '\\'
 852            doublequote = False
 853            skipinitialspace = True
 854            lineterminator = '\r\n'
 855            quoting = csv.QUOTE_NONE
 856        d = mydialect()
 857
 858        mydialect.lineterminator = ":::"
 859        d = mydialect()
 860
 861        mydialect.lineterminator = 4
 862        self.assertRaises(csv.Error, mydialect)
 863
 864
 865class TestSniffer(unittest.TestCase):
 866    sample1 = """\
 867Harry's, Arlington Heights, IL, 2/1/03, Kimi Hayes
 868Shark City, Glendale Heights, IL, 12/28/02, Prezence
 869Tommy's Place, Blue Island, IL, 12/28/02, Blue Sunday/White Crow
 870Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
 871"""
 872    sample2 = """\
 873'Harry''s':'Arlington Heights':'IL':'2/1/03':'Kimi Hayes'
 874'Shark City':'Glendale Heights':'IL':'12/28/02':'Prezence'
 875'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow'
 876'Stonecutters Seafood and Chop House':'Lemont':'IL':'12/19/02':'Week Back'
 877"""
 878    header = '''\
 879"venue","city","state","date","performers"
 880'''
 881    sample3 = '''\
 88205/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03
 88305/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03
 88405/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03
 885'''
 886
 887    sample4 = '''\
 8882147483648;43.0e12;17;abc;def
 889147483648;43.0e2;17;abc;def
 89047483648;43.0;170;abc;def
 891'''
 892
 893    sample5 = "aaa\tbbb\r\nAAA\t\r\nBBB\t\r\n"
 894    sample6 = "a|b|c\r\nd|e|f\r\n"
 895    sample7 = "'a'|'b'|'c'\r\n'd'|e|f\r\n"
 896
 897    def test_has_header(self):
 898        sniffer = csv.Sniffer()
 899        self.assertEqual(sniffer.has_header(self.sample1), False)
 900        self.assertEqual(sniffer.has_header(self.header+self.sample1), True)
 901
 902    def test_sniff(self):
 903        sniffer = csv.Sniffer()
 904        dialect = sniffer.sniff(self.sample1)
 905        self.assertEqual(dialect.delimiter, ",")
 906        self.assertEqual(dialect.quotechar, '"')
 907        self.assertEqual(dialect.skipinitialspace, True)
 908
 909        dialect = sniffer.sniff(self.sample2)
 910        self.assertEqual(dialect.delimiter, ":")
 911        self.assertEqual(dialect.quotechar, "'")
 912        self.assertEqual(dialect.skipinitialspace, False)
 913
 914    def test_delimiters(self):
 915        sniffer = csv.Sniffer()
 916        dialect = sniffer.sniff(self.sample3)
 917        # given that all three lines in sample3 are equal,
 918        # I think that any character could have been 'guessed' as the
 919        # delimiter, depending on dictionary order
 920        self.assert_(dialect.delimiter in self.sample3)
 921        dialect = sniffer.sniff(self.sample3, delimiters="?,")
 922        self.assertEqual(dialect.delimiter, "?")
 923        dialect = sniffer.sniff(self.sample3, delimiters="/,")
 924        self.assertEqual(dialect.delimiter, "/")
 925        dialect = sniffer.sniff(self.sample4)
 926        self.assertEqual(dialect.delimiter, ";")
 927        dialect = sniffer.sniff(self.sample5)
 928        self.assertEqual(dialect.delimiter, "\t")
 929        dialect = sniffer.sniff(self.sample6)
 930        self.assertEqual(dialect.delimiter, "|")
 931        dialect = sniffer.sniff(self.sample7)
 932        self.assertEqual(dialect.delimiter, "|")
 933        self.assertEqual(dialect.quotechar, "'")
 934
 935if not hasattr(sys, "gettotalrefcount"):
 936    if test_support.verbose: print "*** skipping leakage tests ***"
 937else:
 938    class NUL:
 939        def write(s, *args):
 940            pass
 941        writelines = write
 942
 943    class TestLeaks(unittest.TestCase):
 944        def test_create_read(self):
 945            delta = 0
 946            lastrc = sys.gettotalrefcount()
 947            for i in xrange(20):
 948                gc.collect()
 949                self.assertEqual(gc.garbage, [])
 950                rc = sys.gettotalrefcount()
 951                csv.reader(["a,b,c\r\n"])
 952                csv.reader(["a,b,c\r\n"])
 953                csv.reader(["a,b,c\r\n"])
 954                delta = rc-lastrc
 955                lastrc = rc
 956            # if csv.reader() leaks, last delta should be 3 or more
 957            self.assertEqual(delta < 3, True)
 958
 959        def test_create_write(self):
 960            delta = 0
 961            lastrc = sys.gettotalrefcount()
 962            s = NUL()
 963            for i in xrange(20):
 964                gc.collect()
 965                self.assertEqual(gc.garbage, [])
 966                rc = sys.gettotalrefcount()
 967                csv.writer(s)
 968                csv.writer(s)
 969                csv.writer(s)
 970                delta = rc-lastrc
 971                lastrc = rc
 972            # if csv.writer() leaks, last delta should be 3 or more
 973            self.assertEqual(delta < 3, True)
 974
 975        def test_read(self):
 976            delta = 0
 977            rows = ["a,b,c\r\n"]*5
 978            lastrc = sys.gettotalrefcount()
 979            for i in xrange(20):
 980                gc.collect()
 981                self.assertEqual(gc.garbage, [])
 982                rc = sys.gettotalrefcount()
 983                rdr = csv.reader(rows)
 984                for row in rdr:
 985                    pass
 986                delta = rc-lastrc
 987                lastrc = rc
 988            # if reader leaks during read, delta should be 5 or more
 989            self.assertEqual(delta < 5, True)
 990
 991        def test_write(self):
 992            delta = 0
 993            rows = [[1,2,3]]*5
 994            s = NUL()
 995            lastrc = sys.gettotalrefcount()
 996            for i in xrange(20):
 997                gc.collect()
 998                self.assertEqual(gc.garbage, [])
 999                rc = sys.gettotalrefcount()
1000                writer = csv.writer(s)
1001                for row in rows:
1002                    writer.writerow(row)
1003                delta = rc-lastrc
1004                lastrc = rc
1005            # if writer leaks during write, last delta should be 5 or more
1006            self.assertEqual(delta < 5, True)
1007
1008# commented out for now - csv module doesn't yet support Unicode
1009## class TestUnicode(unittest.TestCase):
1010##     def test_unicode_read(self):
1011##         import codecs
1012##         f = codecs.EncodedFile(StringIO("Martin von Löwis,"
1013##                                         "Marc André Lemburg,"
1014##                                         "Guido van Rossum,"
1015##                                         "François Pinard\r\n"),
1016##                                data_encoding='iso-8859-1')
1017##         reader = csv.reader(f)
1018##         self.assertEqual(list(reader), [[u"Martin von Löwis",
1019##                                          u"Marc André Lemburg",
1020##                                          u"Guido van Rossum",
1021##                                          u"François Pinardn"]])
1022
1023def test_main():
1024    mod = sys.modules[__name__]
1025    test_support.run_unittest(
1026        *[getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
1027    )
1028
1029if __name__ == '__main__':
1030    test_main()