PageRenderTime 51ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/tests/test-pyshlex.py

https://bitbucket.org/pmezard/pysh
Python | 206 lines | 187 code | 8 blank | 11 comment | 10 complexity | 9a7a0e887d07bb79dad6258593dfb8a8 MD5 | raw file
Possible License(s): GPL-2.0
  1. # test-pyshlex.py - lexer module unit tests
  2. #
  3. # Copyright 2007 Patrick Mezard
  4. #
  5. # This software may be used and distributed according to the terms
  6. # of the GNU General Public License, incorporated herein by reference.
  7. import unittest
  8. from pysh.pyshlex import *
  9. class TestLexer(unittest.TestCase):
  10. def test_lexer(self):
  11. def get_all_tokens(s):
  12. """Like get_tokens() but exhaust the input sequence and return all tokens
  13. in a single list.
  14. """
  15. lexer = PLYLexer()
  16. tokens = []
  17. while s:
  18. s = lexer.add(s, True)
  19. while 1:
  20. token = lexer.token()
  21. if token is None:
  22. break
  23. tokens.append(token)
  24. tokens = [(t.value, t.type) for t in tokens]
  25. return tokens, s
  26. tests = [
  27. #Quoting characters must be left in the tokens
  28. ('\\a\n', ([('\\a', TK_TOKEN), ('\n', TK_NEWLINE)], '')),
  29. ('c"a"b\n', ([('c"a"b', TK_TOKEN), ('\n', TK_NEWLINE)], '')),
  30. ("c'a'b\n", ([("c'a'b", TK_TOKEN), ('\n', TK_NEWLINE)], '')),
  31. #Quoted newlines must disappear
  32. ('a\\\nb\n', ([('ab', TK_TOKEN), ('\n', TK_NEWLINE)], '')),
  33. #Simple echo with file redirection
  34. ('echo s s2 >test.txt\n', ([('echo', TK_TOKEN), ('s', TK_TOKEN), ('s2', TK_TOKEN), ('>', 'GREATER'), ('test.txt', TK_TOKEN), ('\n', TK_NEWLINE)], '')),
  35. #Pipes and quotes and redirections
  36. ('hg status 2>&1 |sed -e "s:/.*\(/t/.*\):...\1:" ', ([('hg', 'TOKEN'), ('status', 'TOKEN'), ('2', 'IO_NUMBER'), ('>&', 'GREATAND'), ('1', 'TOKEN'), ('|', 'PIPE'), ('sed', 'TOKEN'), ('-e', 'TOKEN'), ('"s:/.*\\(/t/.*\\):...\x01:"', 'TOKEN')], '')),
  37. #Backquotes
  38. ('abspath=`pwd`/badext.py', ([('abspath=`pwd`/badext.py', 'ASSIGNMENT_WORD')], '')),
  39. #Expansion forms
  40. ('echo $(FOO)${BAR}$BAZ', ([('echo', 'TOKEN'), ('$(FOO)${BAR}$BAZ', 'TOKEN')], '')),
  41. #May trigger a NeedMore if the eof flag is not passed correctly to
  42. #the name matcher.
  43. ('echo $BAZ', ([('echo', 'TOKEN'), ('$BAZ', 'TOKEN')], '')),
  44. #Quoted stuff
  45. ('echo "foo$(BAR)\'baz\'\\"bletch"', ([('echo', 'TOKEN'), ('"foo$(BAR)\'baz\'\\"bletch"', 'TOKEN')], '')),
  46. #IO_NUMBER are delimited by '<' or '>'
  47. ('echo 123 > a', ([('echo', 'TOKEN'), ('123', 'TOKEN'), ('>', 'GREATER'), ('a', 'TOKEN')], '')),
  48. ('hg status 2>&1', ([('hg', 'TOKEN'), ('status', 'TOKEN'), ('2', 'IO_NUMBER'), ('>&', 'GREATAND'), ('1', 'TOKEN')], '')),
  49. #Here-documents
  50. ('cat <<eof\nfoo\neof\n', ([('cat', 'TOKEN'), ('<<', 'DLESS'), ('eof', 'HERENAME'), ('foo\n', 'TOKEN'), ('\n', 'NEWLINE')], '')),
  51. #Double document
  52. ('cat <<eof; cat <<- eof2\nfoo\neof\n\t\tbar\neof2\n', ([('cat', 'TOKEN'), ('<<', 'DLESS'), ('eof', 'HERENAME'), ('foo\n', 'TOKEN'), (';', 'COMMA'), ('cat', 'TOKEN'), ('<<-', 'DLESSDASH'), ('eof2', 'HERENAME'), ('bar\n', 'TOKEN'), ('\n', 'NEWLINE')], '')),
  53. #Opening delimiter with quoted characters
  54. ('cat <<e\\o"f"\nfoo\neof\n', ([('cat', 'TOKEN'), ('<<', 'DLESS'), ('e\\o"f"', 'HERENAME'), ('foo\n', 'TOKEN'), ('\n', 'NEWLINE')], '')),
  55. #Test newline in here-document operator following tokens
  56. ('cat <<eof; echo "fuu\nba\nz"\nbar\neof\n', ([('cat', 'TOKEN'), ('<<', 'DLESS'), ('eof', 'HERENAME'), ('bar\n', 'TOKEN'), (';', 'COMMA'), ('echo', 'TOKEN'), ('"fuu\nba\nz"', 'TOKEN'), ('\n','NEWLINE')], '')),
  57. #Test comment following here-document opening delimiter
  58. ('cat <<eof #a comment\nbar\neof\n', ([('cat', 'TOKEN'), ('<<', 'DLESS'), ('eof', 'HERENAME'), ('bar\n', 'TOKEN'), ('\n', 'NEWLINE')], '')),
  59. #Test here document terminated by eof (eof being or not the last line)
  60. ('cat <<eof\nbar', ([('cat', 'TOKEN'), ('<<', 'DLESS'), ('eof', 'HERENAME'), ('bar', 'TOKEN'), ('\n', 'NEWLINE')], '')),
  61. ('cat <<eof\nbar\neof', ([('cat', 'TOKEN'), ('<<', 'DLESS'), ('eof', 'HERENAME'), ('bar\n', 'TOKEN'), ('\n', 'NEWLINE')], '')),
  62. #Test 'In' detection in 'For' loop
  63. ('ls in', ([('ls', 'TOKEN'), ('in', 'TOKEN')], '')),
  64. #Newline delimiting problem because of trailing spaces
  65. ('\ncd a \necho a > a ', ([('\n', 'NEWLINE'), ('cd', 'TOKEN'), ('a', 'TOKEN'), ('\n', 'NEWLINE'), ('echo', 'TOKEN'), ('a', 'TOKEN'), ('>', 'GREATER'), ('a', 'TOKEN')], '')),
  66. #Function definition
  67. ('cleanpath()\n{\necho function definition\n}', ([('cleanpath', 'TOKEN'), ('(', 'LPARENS'), (')', 'RPARENS'), ('\n', 'NEWLINE'), ('{', 'Lbrace'), ('\n', 'NEWLINE'), ('echo', 'TOKEN'), ('function', 'TOKEN'), ('definition', 'TOKEN'), ('\n', 'NEWLINE'), ('}', 'Rbrace')], '')),
  68. ('cleanpath ( )\n{\necho function definition\n}', ([('cleanpath', 'TOKEN'), ('(', 'LPARENS'), (')', 'RPARENS'), ('\n', 'NEWLINE'), ('{', 'Lbrace'), ('\n', 'NEWLINE'), ('echo', 'TOKEN'), ('function', 'TOKEN'), ('definition', 'TOKEN'), ('\n', 'NEWLINE'), ('}', 'Rbrace')], '')),
  69. ]
  70. for s, expected in tests:
  71. tokens = get_all_tokens(s)
  72. self.assertEqual(tokens, expected)
  73. tests = [
  74. #Missing here-document opening delimiter
  75. 'cat <<\n',
  76. 'cat <<',
  77. ]
  78. for t in tests:
  79. try:
  80. get_all_tokens(t)
  81. except ShellError:
  82. pass
  83. except BaseException, e:
  84. self.fail("'%s' raised '%s'" % (repr(t),repr(e)))
  85. else:
  86. self.fail("'%s' did not raise" % repr(t))
  87. def test_backslash(self):
  88. lexer = PLYLexer()
  89. self.assertRaises(NeedMore, lambda: lexer.add('foo\\'))
  90. self.assertRaises(NeedMore, lambda: lexer.add('ba'))
  91. lexer.add('r', True)
  92. self.assertEqual('foo\\bar', lexer.token().value)
  93. def test_backquote(self):
  94. lexer = PLYLexer()
  95. self.assertRaises(NeedMore, lambda: lexer.add('foo `'))
  96. self.assertRaises(NeedMore, lambda: lexer.add('ba'))
  97. lexer.add('r`', True)
  98. self.assertEqual('foo', lexer.token().value)
  99. self.assertEqual('`bar`', lexer.token().value)
  100. def test_dollar(self):
  101. lexer = PLYLexer()
  102. self.assertRaises(NeedMore, lambda: lexer.add('foo$'))
  103. self.assertRaises(NeedMore, lambda: lexer.add('ba'))
  104. lexer.add('r', True)
  105. self.assertEqual('foo$bar', lexer.token().value)
  106. def test_wordlexer(self):
  107. def lex_with_slices(s, size=None):
  108. lexer = WordLexer()
  109. result = []
  110. while 1:
  111. if size is None or size>=len(s):
  112. part = s
  113. else:
  114. part = s[:size]
  115. eof = len(s)==len(part)
  116. try:
  117. wtree, remaining = lexer.add(part, eof)
  118. result.append(wtree)
  119. except NeedMore:
  120. remaining = ''
  121. s = s[len(part)-len(remaining):]
  122. if not s:
  123. break
  124. return result
  125. tests = [
  126. ("'foo'", [["'", 'foo', "'"]]),
  127. ("$(foo)", [['$(', 'foo', ')']]),
  128. ("$foo", [['$', 'foo', '']]),
  129. ("$(foo)$bar", [['$(', 'foo', ')'], ['$', 'bar', '']]),
  130. ("$( \(a\))", [['$(', ' ', ['\\', '(', ''], 'a', ['\\', ')', ''], '', ')']]),
  131. ("$( '(a)')", [['$(', ' ', ["'", '(a)', "'"], '', ')']]),
  132. ("$( \"$(bar)\"plus$foo)", [['$(', ' ', ['"', '', ['$(', 'bar', ')'], '', '"'], 'plus', ['$', 'foo', ''], '', ')']]),
  133. ("`foo`", [['`', 'foo', '`']]),
  134. ("${foo}", [['${', 'foo', '}']]),
  135. ('"a\\$\\a"', [['"', 'a', ['\\', '$', ''], '', ['', '\\a', ''], '', '"']]),
  136. ('`a\\$\\a`', [['`', 'a', ['\\', '$', ''], '', ['', '\\a', ''], '', '`']]),
  137. ("\\'", [['\\', "'", '']]),
  138. # backslash is backslash when single-quoted
  139. ("'\\foo'", [["'", '\\foo', "'"]]),
  140. ('"foo\'bar\'baz"', [['"', 'foo\'bar\'baz', '"']]),
  141. ('"foo\'b\\\\q\'baz"', [['"', "foo'b", ['\\', '\\', ''], "q'baz", '"']]),
  142. # backslash still escapes double quotes within double quotes
  143. ('"foo\\"$i\\"bar"', [['"', 'foo', ['\\', '"', ''], '', ['$', 'i', ''], '', ['\\', '"', ''], 'bar', '"']]),
  144. # backslash should not escape most stuff when within double quotes
  145. ('"foo\\nbar"', [['"', 'foo', ['', '\\n', ''], 'bar', '"']]),
  146. ]
  147. def test_with_slices(test, expected, stride):
  148. try:
  149. res = lex_with_slices(test, stride)
  150. self.assertEqual(res, expected)
  151. self.assertEqual(wordtree_as_string(res), test)
  152. except Exception, e:
  153. import traceback
  154. traceback.print_exc()
  155. self.fail('%s for %s with stride=%s' % (str(e), test, str(stride)))
  156. for test, expected in tests:
  157. test_with_slices(test, expected, None)
  158. test_with_slices(test, expected, 1)
  159. def test_makewordtree(self):
  160. wtree = make_wordtree('foo"bar$(baz)" ble\\tch')
  161. self.assertEqual(wtree, ['', 'foo', ['"', 'bar', ['$(', 'baz', ')'], '"'], ' ble', ['\\', 't', ''], 'ch', ''])
  162. wtree = make_wordtree('$.')
  163. self.assertEqual(wtree, ['', '$', '.', ''])
  164. #quotes are not special when parsing here documents
  165. wtree = make_wordtree('foo"b\'a\'r$(b"a"z)"\\\n b"le\\tch', True)
  166. self.assertEqual(wtree, ['', 'foo"b\'a\'r', ['$(', 'b', ['"', 'a', '"'], 'z', ')'], '"', '', ' b"le', '\\t', 'ch', ''])
  167. wtree = make_wordtree('""')
  168. self.assertEqual(wtree, ['', ['"', '', '"'], ''])
  169. def test_normalizewordtree(self):
  170. tests = [
  171. (['', 'one', ['', 'two', ['"', 'three', '"'], ''], 'four', ['', 'five', ''], ''],
  172. ['', 'one', 'two', ['"', 'three', '"'], 'four', 'five', '']),
  173. (['', 'foo', ['"', 'bar', ['$(', 'baz', ')'], '', '"'], ' bletch', ''],
  174. ['', 'foo', ['"', 'bar', ['$(', 'baz', ')'], '"'], ' bletch', '']),
  175. (['', ['"', '', '"'], ''],
  176. ['', ['"', '', '"'], ''])
  177. ]
  178. for test, expected in tests:
  179. self.assertEqual(normalize_wordtree(test), expected)
  180. if __name__=='__main__':
  181. unittest.main()