PageRenderTime 666ms CodeModel.GetById 23ms app.highlight 37ms RepoModel.GetById 1ms app.codeStats 1ms

/Lib/test/test_textwrap.py

http://unladen-swallow.googlecode.com/
Python | 609 lines | 579 code | 18 blank | 12 comment | 19 complexity | 9fc216fd4d151ffa6b3428b21ab9e3de MD5 | raw file
  1#
  2# Test suite for the textwrap module.
  3#
  4# Original tests written by Greg Ward <gward@python.net>.
  5# Converted to PyUnit by Peter Hansen <peter@engcorp.com>.
  6# Currently maintained by Greg Ward.
  7#
  8# $Id: test_textwrap.py 67896 2008-12-21 17:01:26Z benjamin.peterson $
  9#
 10
 11import unittest
 12from test import test_support
 13
 14from textwrap import TextWrapper, wrap, fill, dedent
 15
 16
 17class BaseTestCase(unittest.TestCase):
 18    '''Parent class with utility methods for textwrap tests.'''
 19
 20    def show(self, textin):
 21        if isinstance(textin, list):
 22            result = []
 23            for i in range(len(textin)):
 24                result.append("  %d: %r" % (i, textin[i]))
 25            result = '\n'.join(result)
 26        elif isinstance(textin, basestring):
 27            result = "  %s\n" % repr(textin)
 28        return result
 29
 30
 31    def check(self, result, expect):
 32        self.assertEquals(result, expect,
 33            'expected:\n%s\nbut got:\n%s' % (
 34                self.show(expect), self.show(result)))
 35
 36    def check_wrap(self, text, width, expect, **kwargs):
 37        result = wrap(text, width, **kwargs)
 38        self.check(result, expect)
 39
 40    def check_split(self, text, expect):
 41        result = self.wrapper._split(text)
 42        self.assertEquals(result, expect,
 43                          "\nexpected %r\n"
 44                          "but got  %r" % (expect, result))
 45
 46
 47class WrapTestCase(BaseTestCase):
 48
 49    def setUp(self):
 50        self.wrapper = TextWrapper(width=45)
 51
 52    def test_simple(self):
 53        # Simple case: just words, spaces, and a bit of punctuation
 54
 55        text = "Hello there, how are you this fine day?  I'm glad to hear it!"
 56
 57        self.check_wrap(text, 12,
 58                        ["Hello there,",
 59                         "how are you",
 60                         "this fine",
 61                         "day?  I'm",
 62                         "glad to hear",
 63                         "it!"])
 64        self.check_wrap(text, 42,
 65                        ["Hello there, how are you this fine day?",
 66                         "I'm glad to hear it!"])
 67        self.check_wrap(text, 80, [text])
 68
 69
 70    def test_whitespace(self):
 71        # Whitespace munging and end-of-sentence detection
 72
 73        text = """\
 74This is a paragraph that already has
 75line breaks.  But some of its lines are much longer than the others,
 76so it needs to be wrapped.
 77Some lines are \ttabbed too.
 78What a mess!
 79"""
 80
 81        expect = ["This is a paragraph that already has line",
 82                  "breaks.  But some of its lines are much",
 83                  "longer than the others, so it needs to be",
 84                  "wrapped.  Some lines are  tabbed too.  What a",
 85                  "mess!"]
 86
 87        wrapper = TextWrapper(45, fix_sentence_endings=True)
 88        result = wrapper.wrap(text)
 89        self.check(result, expect)
 90
 91        result = wrapper.fill(text)
 92        self.check(result, '\n'.join(expect))
 93
 94    def test_fix_sentence_endings(self):
 95        wrapper = TextWrapper(60, fix_sentence_endings=True)
 96
 97        # SF #847346: ensure that fix_sentence_endings=True does the
 98        # right thing even on input short enough that it doesn't need to
 99        # be wrapped.
100        text = "A short line. Note the single space."
101        expect = ["A short line.  Note the single space."]
102        self.check(wrapper.wrap(text), expect)
103
104        # Test some of the hairy end cases that _fix_sentence_endings()
105        # is supposed to handle (the easy stuff is tested in
106        # test_whitespace() above).
107        text = "Well, Doctor? What do you think?"
108        expect = ["Well, Doctor?  What do you think?"]
109        self.check(wrapper.wrap(text), expect)
110
111        text = "Well, Doctor?\nWhat do you think?"
112        self.check(wrapper.wrap(text), expect)
113
114        text = 'I say, chaps! Anyone for "tennis?"\nHmmph!'
115        expect = ['I say, chaps!  Anyone for "tennis?"  Hmmph!']
116        self.check(wrapper.wrap(text), expect)
117
118        wrapper.width = 20
119        expect = ['I say, chaps!', 'Anyone for "tennis?"', 'Hmmph!']
120        self.check(wrapper.wrap(text), expect)
121
122        text = 'And she said, "Go to hell!"\nCan you believe that?'
123        expect = ['And she said, "Go to',
124                  'hell!"  Can you',
125                  'believe that?']
126        self.check(wrapper.wrap(text), expect)
127
128        wrapper.width = 60
129        expect = ['And she said, "Go to hell!"  Can you believe that?']
130        self.check(wrapper.wrap(text), expect)
131
132        text = 'File stdio.h is nice.'
133        expect = ['File stdio.h is nice.']
134        self.check(wrapper.wrap(text), expect)
135
136    def test_wrap_short(self):
137        # Wrapping to make short lines longer
138
139        text = "This is a\nshort paragraph."
140
141        self.check_wrap(text, 20, ["This is a short",
142                                   "paragraph."])
143        self.check_wrap(text, 40, ["This is a short paragraph."])
144
145
146    def test_wrap_short_1line(self):
147        # Test endcases
148
149        text = "This is a short line."
150
151        self.check_wrap(text, 30, ["This is a short line."])
152        self.check_wrap(text, 30, ["(1) This is a short line."],
153                        initial_indent="(1) ")
154
155
156    def test_hyphenated(self):
157        # Test breaking hyphenated words
158
159        text = ("this-is-a-useful-feature-for-"
160                "reformatting-posts-from-tim-peters'ly")
161
162        self.check_wrap(text, 40,
163                        ["this-is-a-useful-feature-for-",
164                         "reformatting-posts-from-tim-peters'ly"])
165        self.check_wrap(text, 41,
166                        ["this-is-a-useful-feature-for-",
167                         "reformatting-posts-from-tim-peters'ly"])
168        self.check_wrap(text, 42,
169                        ["this-is-a-useful-feature-for-reformatting-",
170                         "posts-from-tim-peters'ly"])
171
172    def test_hyphenated_numbers(self):
173        # Test that hyphenated numbers (eg. dates) are not broken like words.
174        text = ("Python 1.0.0 was released on 1994-01-26.  Python 1.0.1 was\n"
175                "released on 1994-02-15.")
176
177        self.check_wrap(text, 35, ['Python 1.0.0 was released on',
178                                   '1994-01-26.  Python 1.0.1 was',
179                                   'released on 1994-02-15.'])
180        self.check_wrap(text, 40, ['Python 1.0.0 was released on 1994-01-26.',
181                                   'Python 1.0.1 was released on 1994-02-15.'])
182
183        text = "I do all my shopping at 7-11."
184        self.check_wrap(text, 25, ["I do all my shopping at",
185                                   "7-11."])
186        self.check_wrap(text, 27, ["I do all my shopping at",
187                                   "7-11."])
188        self.check_wrap(text, 29, ["I do all my shopping at 7-11."])
189
190    def test_em_dash(self):
191        # Test text with em-dashes
192        text = "Em-dashes should be written -- thus."
193        self.check_wrap(text, 25,
194                        ["Em-dashes should be",
195                         "written -- thus."])
196
197        # Probe the boundaries of the properly written em-dash,
198        # ie. " -- ".
199        self.check_wrap(text, 29,
200                        ["Em-dashes should be written",
201                         "-- thus."])
202        expect = ["Em-dashes should be written --",
203                  "thus."]
204        self.check_wrap(text, 30, expect)
205        self.check_wrap(text, 35, expect)
206        self.check_wrap(text, 36,
207                        ["Em-dashes should be written -- thus."])
208
209        # The improperly written em-dash is handled too, because
210        # it's adjacent to non-whitespace on both sides.
211        text = "You can also do--this or even---this."
212        expect = ["You can also do",
213                  "--this or even",
214                  "---this."]
215        self.check_wrap(text, 15, expect)
216        self.check_wrap(text, 16, expect)
217        expect = ["You can also do--",
218                  "this or even---",
219                  "this."]
220        self.check_wrap(text, 17, expect)
221        self.check_wrap(text, 19, expect)
222        expect = ["You can also do--this or even",
223                  "---this."]
224        self.check_wrap(text, 29, expect)
225        self.check_wrap(text, 31, expect)
226        expect = ["You can also do--this or even---",
227                  "this."]
228        self.check_wrap(text, 32, expect)
229        self.check_wrap(text, 35, expect)
230
231        # All of the above behaviour could be deduced by probing the
232        # _split() method.
233        text = "Here's an -- em-dash and--here's another---and another!"
234        expect = ["Here's", " ", "an", " ", "--", " ", "em-", "dash", " ",
235                  "and", "--", "here's", " ", "another", "---",
236                  "and", " ", "another!"]
237        self.check_split(text, expect)
238
239        text = "and then--bam!--he was gone"
240        expect = ["and", " ", "then", "--", "bam!", "--",
241                  "he", " ", "was", " ", "gone"]
242        self.check_split(text, expect)
243
244
245    def test_unix_options (self):
246        # Test that Unix-style command-line options are wrapped correctly.
247        # Both Optik (OptionParser) and Docutils rely on this behaviour!
248
249        text = "You should use the -n option, or --dry-run in its long form."
250        self.check_wrap(text, 20,
251                        ["You should use the",
252                         "-n option, or --dry-",
253                         "run in its long",
254                         "form."])
255        self.check_wrap(text, 21,
256                        ["You should use the -n",
257                         "option, or --dry-run",
258                         "in its long form."])
259        expect = ["You should use the -n option, or",
260                  "--dry-run in its long form."]
261        self.check_wrap(text, 32, expect)
262        self.check_wrap(text, 34, expect)
263        self.check_wrap(text, 35, expect)
264        self.check_wrap(text, 38, expect)
265        expect = ["You should use the -n option, or --dry-",
266                  "run in its long form."]
267        self.check_wrap(text, 39, expect)
268        self.check_wrap(text, 41, expect)
269        expect = ["You should use the -n option, or --dry-run",
270                  "in its long form."]
271        self.check_wrap(text, 42, expect)
272
273        # Again, all of the above can be deduced from _split().
274        text = "the -n option, or --dry-run or --dryrun"
275        expect = ["the", " ", "-n", " ", "option,", " ", "or", " ",
276                  "--dry-", "run", " ", "or", " ", "--dryrun"]
277        self.check_split(text, expect)
278
279    def test_funky_hyphens (self):
280        # Screwy edge cases cooked up by David Goodger.  All reported
281        # in SF bug #596434.
282        self.check_split("what the--hey!", ["what", " ", "the", "--", "hey!"])
283        self.check_split("what the--", ["what", " ", "the--"])
284        self.check_split("what the--.", ["what", " ", "the--."])
285        self.check_split("--text--.", ["--text--."])
286
287        # When I first read bug #596434, this is what I thought David
288        # was talking about.  I was wrong; these have always worked
289        # fine.  The real problem is tested in test_funky_parens()
290        # below...
291        self.check_split("--option", ["--option"])
292        self.check_split("--option-opt", ["--option-", "opt"])
293        self.check_split("foo --option-opt bar",
294                         ["foo", " ", "--option-", "opt", " ", "bar"])
295
296    def test_punct_hyphens(self):
297        # Oh bother, SF #965425 found another problem with hyphens --
298        # hyphenated words in single quotes weren't handled correctly.
299        # In fact, the bug is that *any* punctuation around a hyphenated
300        # word was handled incorrectly, except for a leading "--", which
301        # was special-cased for Optik and Docutils.  So test a variety
302        # of styles of punctuation around a hyphenated word.
303        # (Actually this is based on an Optik bug report, #813077).
304        self.check_split("the 'wibble-wobble' widget",
305                         ['the', ' ', "'wibble-", "wobble'", ' ', 'widget'])
306        self.check_split('the "wibble-wobble" widget',
307                         ['the', ' ', '"wibble-', 'wobble"', ' ', 'widget'])
308        self.check_split("the (wibble-wobble) widget",
309                         ['the', ' ', "(wibble-", "wobble)", ' ', 'widget'])
310        self.check_split("the ['wibble-wobble'] widget",
311                         ['the', ' ', "['wibble-", "wobble']", ' ', 'widget'])
312
313    def test_funky_parens (self):
314        # Second part of SF bug #596434: long option strings inside
315        # parentheses.
316        self.check_split("foo (--option) bar",
317                         ["foo", " ", "(--option)", " ", "bar"])
318
319        # Related stuff -- make sure parens work in simpler contexts.
320        self.check_split("foo (bar) baz",
321                         ["foo", " ", "(bar)", " ", "baz"])
322        self.check_split("blah (ding dong), wubba",
323                         ["blah", " ", "(ding", " ", "dong),",
324                          " ", "wubba"])
325
326    def test_initial_whitespace(self):
327        # SF bug #622849 reported inconsistent handling of leading
328        # whitespace; let's test that a bit, shall we?
329        text = " This is a sentence with leading whitespace."
330        self.check_wrap(text, 50,
331                        [" This is a sentence with leading whitespace."])
332        self.check_wrap(text, 30,
333                        [" This is a sentence with", "leading whitespace."])
334
335    def test_no_drop_whitespace(self):
336        # SF patch #1581073
337        text = " This is a    sentence with     much whitespace."
338        self.check_wrap(text, 10,
339                        [" This is a", "    ", "sentence ",
340                         "with     ", "much white", "space."],
341                        drop_whitespace=False)
342
343    if test_support.have_unicode:
344        def test_unicode(self):
345            # *Very* simple test of wrapping Unicode strings.  I'm sure
346            # there's more to it than this, but let's at least make
347            # sure textwrap doesn't crash on Unicode input!
348            text = u"Hello there, how are you today?"
349            self.check_wrap(text, 50, [u"Hello there, how are you today?"])
350            self.check_wrap(text, 20, [u"Hello there, how are", "you today?"])
351            olines = self.wrapper.wrap(text)
352            assert isinstance(olines, list) and isinstance(olines[0], unicode)
353            otext = self.wrapper.fill(text)
354            assert isinstance(otext, unicode)
355
356        def test_no_split_at_umlaut(self):
357            text = u"Die Empf\xe4nger-Auswahl"
358            self.check_wrap(text, 13, [u"Die", u"Empf\xe4nger-", u"Auswahl"])
359
360        def test_umlaut_followed_by_dash(self):
361            text = u"aa \xe4\xe4-\xe4\xe4"
362            self.check_wrap(text, 7, [u"aa \xe4\xe4-", u"\xe4\xe4"])
363
364    def test_split(self):
365        # Ensure that the standard _split() method works as advertised
366        # in the comments
367
368        text = "Hello there -- you goof-ball, use the -b option!"
369
370        result = self.wrapper._split(text)
371        self.check(result,
372             ["Hello", " ", "there", " ", "--", " ", "you", " ", "goof-",
373              "ball,", " ", "use", " ", "the", " ", "-b", " ",  "option!"])
374
375    def test_break_on_hyphens(self):
376        # Ensure that the break_on_hyphens attributes work
377        text = "yaba daba-doo"
378        self.check_wrap(text, 10, ["yaba daba-", "doo"],
379                        break_on_hyphens=True)
380        self.check_wrap(text, 10, ["yaba", "daba-doo"],
381                        break_on_hyphens=False)
382
383    def test_bad_width(self):
384        # Ensure that width <= 0 is caught.
385        text = "Whatever, it doesn't matter."
386        self.assertRaises(ValueError, wrap, text, 0)
387        self.assertRaises(ValueError, wrap, text, -1)
388
389
390class LongWordTestCase (BaseTestCase):
391    def setUp(self):
392        self.wrapper = TextWrapper()
393        self.text = '''\
394Did you say "supercalifragilisticexpialidocious?"
395How *do* you spell that odd word, anyways?
396'''
397
398    def test_break_long(self):
399        # Wrap text with long words and lots of punctuation
400
401        self.check_wrap(self.text, 30,
402                        ['Did you say "supercalifragilis',
403                         'ticexpialidocious?" How *do*',
404                         'you spell that odd word,',
405                         'anyways?'])
406        self.check_wrap(self.text, 50,
407                        ['Did you say "supercalifragilisticexpialidocious?"',
408                         'How *do* you spell that odd word, anyways?'])
409
410        # SF bug 797650.  Prevent an infinite loop by making sure that at
411        # least one character gets split off on every pass.
412        self.check_wrap('-'*10+'hello', 10,
413                        ['----------',
414                         '               h',
415                         '               e',
416                         '               l',
417                         '               l',
418                         '               o'],
419                        subsequent_indent = ' '*15)
420
421        # bug 1146.  Prevent a long word to be wrongly wrapped when the
422        # preceding word is exactly one character shorter than the width
423        self.check_wrap(self.text, 12,
424                        ['Did you say ',
425                         '"supercalifr',
426                         'agilisticexp',
427                         'ialidocious?',
428                         '" How *do*',
429                         'you spell',
430                         'that odd',
431                         'word,',
432                         'anyways?'])
433
434    def test_nobreak_long(self):
435        # Test with break_long_words disabled
436        self.wrapper.break_long_words = 0
437        self.wrapper.width = 30
438        expect = ['Did you say',
439                  '"supercalifragilisticexpialidocious?"',
440                  'How *do* you spell that odd',
441                  'word, anyways?'
442                  ]
443        result = self.wrapper.wrap(self.text)
444        self.check(result, expect)
445
446        # Same thing with kwargs passed to standalone wrap() function.
447        result = wrap(self.text, width=30, break_long_words=0)
448        self.check(result, expect)
449
450
451class IndentTestCases(BaseTestCase):
452
453    # called before each test method
454    def setUp(self):
455        self.text = '''\
456This paragraph will be filled, first without any indentation,
457and then with some (including a hanging indent).'''
458
459
460    def test_fill(self):
461        # Test the fill() method
462
463        expect = '''\
464This paragraph will be filled, first
465without any indentation, and then with
466some (including a hanging indent).'''
467
468        result = fill(self.text, 40)
469        self.check(result, expect)
470
471
472    def test_initial_indent(self):
473        # Test initial_indent parameter
474
475        expect = ["     This paragraph will be filled,",
476                  "first without any indentation, and then",
477                  "with some (including a hanging indent)."]
478        result = wrap(self.text, 40, initial_indent="     ")
479        self.check(result, expect)
480
481        expect = "\n".join(expect)
482        result = fill(self.text, 40, initial_indent="     ")
483        self.check(result, expect)
484
485
486    def test_subsequent_indent(self):
487        # Test subsequent_indent parameter
488
489        expect = '''\
490  * This paragraph will be filled, first
491    without any indentation, and then
492    with some (including a hanging
493    indent).'''
494
495        result = fill(self.text, 40,
496                      initial_indent="  * ", subsequent_indent="    ")
497        self.check(result, expect)
498
499
500# Despite the similar names, DedentTestCase is *not* the inverse
501# of IndentTestCase!
502class DedentTestCase(unittest.TestCase):
503
504    def assertUnchanged(self, text):
505        """assert that dedent() has no effect on 'text'"""
506        self.assertEquals(text, dedent(text))
507
508    def test_dedent_nomargin(self):
509        # No lines indented.
510        text = "Hello there.\nHow are you?\nOh good, I'm glad."
511        self.assertUnchanged(text)
512
513        # Similar, with a blank line.
514        text = "Hello there.\n\nBoo!"
515        self.assertUnchanged(text)
516
517        # Some lines indented, but overall margin is still zero.
518        text = "Hello there.\n  This is indented."
519        self.assertUnchanged(text)
520
521        # Again, add a blank line.
522        text = "Hello there.\n\n  Boo!\n"
523        self.assertUnchanged(text)
524
525    def test_dedent_even(self):
526        # All lines indented by two spaces.
527        text = "  Hello there.\n  How are ya?\n  Oh good."
528        expect = "Hello there.\nHow are ya?\nOh good."
529        self.assertEquals(expect, dedent(text))
530
531        # Same, with blank lines.
532        text = "  Hello there.\n\n  How are ya?\n  Oh good.\n"
533        expect = "Hello there.\n\nHow are ya?\nOh good.\n"
534        self.assertEquals(expect, dedent(text))
535
536        # Now indent one of the blank lines.
537        text = "  Hello there.\n  \n  How are ya?\n  Oh good.\n"
538        expect = "Hello there.\n\nHow are ya?\nOh good.\n"
539        self.assertEquals(expect, dedent(text))
540
541    def test_dedent_uneven(self):
542        # Lines indented unevenly.
543        text = '''\
544        def foo():
545            while 1:
546                return foo
547        '''
548        expect = '''\
549def foo():
550    while 1:
551        return foo
552'''
553        self.assertEquals(expect, dedent(text))
554
555        # Uneven indentation with a blank line.
556        text = "  Foo\n    Bar\n\n   Baz\n"
557        expect = "Foo\n  Bar\n\n Baz\n"
558        self.assertEquals(expect, dedent(text))
559
560        # Uneven indentation with a whitespace-only line.
561        text = "  Foo\n    Bar\n \n   Baz\n"
562        expect = "Foo\n  Bar\n\n Baz\n"
563        self.assertEquals(expect, dedent(text))
564
565    # dedent() should not mangle internal tabs
566    def test_dedent_preserve_internal_tabs(self):
567        text = "  hello\tthere\n  how are\tyou?"
568        expect = "hello\tthere\nhow are\tyou?"
569        self.assertEquals(expect, dedent(text))
570
571        # make sure that it preserves tabs when it's not making any
572        # changes at all
573        self.assertEquals(expect, dedent(expect))
574
575    # dedent() should not mangle tabs in the margin (i.e.
576    # tabs and spaces both count as margin, but are *not*
577    # considered equivalent)
578    def test_dedent_preserve_margin_tabs(self):
579        text = "  hello there\n\thow are you?"
580        self.assertUnchanged(text)
581
582        # same effect even if we have 8 spaces
583        text = "        hello there\n\thow are you?"
584        self.assertUnchanged(text)
585
586        # dedent() only removes whitespace that can be uniformly removed!
587        text = "\thello there\n\thow are you?"
588        expect = "hello there\nhow are you?"
589        self.assertEquals(expect, dedent(text))
590
591        text = "  \thello there\n  \thow are you?"
592        self.assertEquals(expect, dedent(text))
593
594        text = "  \t  hello there\n  \t  how are you?"
595        self.assertEquals(expect, dedent(text))
596
597        text = "  \thello there\n  \t  how are you?"
598        expect = "hello there\n  how are you?"
599        self.assertEquals(expect, dedent(text))
600
601
602def test_main():
603    test_support.run_unittest(WrapTestCase,
604                              LongWordTestCase,
605                              IndentTestCases,
606                              DedentTestCase)
607
608if __name__ == '__main__':
609    test_main()