/tests/test-pyshlex.py
Python | 206 lines | 187 code | 8 blank | 11 comment | 10 complexity | 9a7a0e887d07bb79dad6258593dfb8a8 MD5 | raw file
Possible License(s): GPL-2.0
- # test-pyshlex.py - lexer module unit tests
- #
- # Copyright 2007 Patrick Mezard
- #
- # This software may be used and distributed according to the terms
- # of the GNU General Public License, incorporated herein by reference.
- import unittest
- from pysh.pyshlex import *
- class TestLexer(unittest.TestCase):
- def test_lexer(self):
- def get_all_tokens(s):
- """Like get_tokens() but exhaust the input sequence and return all tokens
- in a single list.
- """
- lexer = PLYLexer()
- tokens = []
- while s:
- s = lexer.add(s, True)
- while 1:
- token = lexer.token()
- if token is None:
- break
- tokens.append(token)
-
- tokens = [(t.value, t.type) for t in tokens]
- return tokens, s
-
- tests = [
- #Quoting characters must be left in the tokens
- ('\\a\n', ([('\\a', TK_TOKEN), ('\n', TK_NEWLINE)], '')),
- ('c"a"b\n', ([('c"a"b', TK_TOKEN), ('\n', TK_NEWLINE)], '')),
- ("c'a'b\n", ([("c'a'b", TK_TOKEN), ('\n', TK_NEWLINE)], '')),
- #Quoted newlines must disappear
- ('a\\\nb\n', ([('ab', TK_TOKEN), ('\n', TK_NEWLINE)], '')),
- #Simple echo with file redirection
- ('echo s s2 >test.txt\n', ([('echo', TK_TOKEN), ('s', TK_TOKEN), ('s2', TK_TOKEN), ('>', 'GREATER'), ('test.txt', TK_TOKEN), ('\n', TK_NEWLINE)], '')),
- #Pipes and quotes and redirections
- ('hg status 2>&1 |sed -e "s:/.*\(/t/.*\):...\1:" ', ([('hg', 'TOKEN'), ('status', 'TOKEN'), ('2', 'IO_NUMBER'), ('>&', 'GREATAND'), ('1', 'TOKEN'), ('|', 'PIPE'), ('sed', 'TOKEN'), ('-e', 'TOKEN'), ('"s:/.*\\(/t/.*\\):...\x01:"', 'TOKEN')], '')),
- #Backquotes
- ('abspath=`pwd`/badext.py', ([('abspath=`pwd`/badext.py', 'ASSIGNMENT_WORD')], '')),
- #Expansion forms
- ('echo $(FOO)${BAR}$BAZ', ([('echo', 'TOKEN'), ('$(FOO)${BAR}$BAZ', 'TOKEN')], '')),
- #May trigger a NeedMore if the eof flag is not passed correctly to
- #the name matcher.
- ('echo $BAZ', ([('echo', 'TOKEN'), ('$BAZ', 'TOKEN')], '')),
- #Quoted stuff
- ('echo "foo$(BAR)\'baz\'\\"bletch"', ([('echo', 'TOKEN'), ('"foo$(BAR)\'baz\'\\"bletch"', 'TOKEN')], '')),
- #IO_NUMBER are delimited by '<' or '>'
- ('echo 123 > a', ([('echo', 'TOKEN'), ('123', 'TOKEN'), ('>', 'GREATER'), ('a', 'TOKEN')], '')),
- ('hg status 2>&1', ([('hg', 'TOKEN'), ('status', 'TOKEN'), ('2', 'IO_NUMBER'), ('>&', 'GREATAND'), ('1', 'TOKEN')], '')),
- #Here-documents
- ('cat <<eof\nfoo\neof\n', ([('cat', 'TOKEN'), ('<<', 'DLESS'), ('eof', 'HERENAME'), ('foo\n', 'TOKEN'), ('\n', 'NEWLINE')], '')),
- #Double document
- ('cat <<eof; cat <<- eof2\nfoo\neof\n\t\tbar\neof2\n', ([('cat', 'TOKEN'), ('<<', 'DLESS'), ('eof', 'HERENAME'), ('foo\n', 'TOKEN'), (';', 'COMMA'), ('cat', 'TOKEN'), ('<<-', 'DLESSDASH'), ('eof2', 'HERENAME'), ('bar\n', 'TOKEN'), ('\n', 'NEWLINE')], '')),
- #Opening delimiter with quoted characters
- ('cat <<e\\o"f"\nfoo\neof\n', ([('cat', 'TOKEN'), ('<<', 'DLESS'), ('e\\o"f"', 'HERENAME'), ('foo\n', 'TOKEN'), ('\n', 'NEWLINE')], '')),
- #Test newline in here-document operator following tokens
- ('cat <<eof; echo "fuu\nba\nz"\nbar\neof\n', ([('cat', 'TOKEN'), ('<<', 'DLESS'), ('eof', 'HERENAME'), ('bar\n', 'TOKEN'), (';', 'COMMA'), ('echo', 'TOKEN'), ('"fuu\nba\nz"', 'TOKEN'), ('\n','NEWLINE')], '')),
- #Test comment following here-document opening delimiter
- ('cat <<eof #a comment\nbar\neof\n', ([('cat', 'TOKEN'), ('<<', 'DLESS'), ('eof', 'HERENAME'), ('bar\n', 'TOKEN'), ('\n', 'NEWLINE')], '')),
- #Test here document terminated by eof (eof being or not the last line)
- ('cat <<eof\nbar', ([('cat', 'TOKEN'), ('<<', 'DLESS'), ('eof', 'HERENAME'), ('bar', 'TOKEN'), ('\n', 'NEWLINE')], '')),
- ('cat <<eof\nbar\neof', ([('cat', 'TOKEN'), ('<<', 'DLESS'), ('eof', 'HERENAME'), ('bar\n', 'TOKEN'), ('\n', 'NEWLINE')], '')),
- #Test 'In' detection in 'For' loop
- ('ls in', ([('ls', 'TOKEN'), ('in', 'TOKEN')], '')),
- #Newline delimiting problem because of trailing spaces
- ('\ncd a \necho a > a ', ([('\n', 'NEWLINE'), ('cd', 'TOKEN'), ('a', 'TOKEN'), ('\n', 'NEWLINE'), ('echo', 'TOKEN'), ('a', 'TOKEN'), ('>', 'GREATER'), ('a', 'TOKEN')], '')),
- #Function definition
- ('cleanpath()\n{\necho function definition\n}', ([('cleanpath', 'TOKEN'), ('(', 'LPARENS'), (')', 'RPARENS'), ('\n', 'NEWLINE'), ('{', 'Lbrace'), ('\n', 'NEWLINE'), ('echo', 'TOKEN'), ('function', 'TOKEN'), ('definition', 'TOKEN'), ('\n', 'NEWLINE'), ('}', 'Rbrace')], '')),
- ('cleanpath ( )\n{\necho function definition\n}', ([('cleanpath', 'TOKEN'), ('(', 'LPARENS'), (')', 'RPARENS'), ('\n', 'NEWLINE'), ('{', 'Lbrace'), ('\n', 'NEWLINE'), ('echo', 'TOKEN'), ('function', 'TOKEN'), ('definition', 'TOKEN'), ('\n', 'NEWLINE'), ('}', 'Rbrace')], '')),
- ]
- for s, expected in tests:
- tokens = get_all_tokens(s)
- self.assertEqual(tokens, expected)
-
- tests = [
- #Missing here-document opening delimiter
- 'cat <<\n',
- 'cat <<',
- ]
-
- for t in tests:
- try:
- get_all_tokens(t)
- except ShellError:
- pass
- except BaseException, e:
- self.fail("'%s' raised '%s'" % (repr(t),repr(e)))
- else:
- self.fail("'%s' did not raise" % repr(t))
-
-
- def test_backslash(self):
- lexer = PLYLexer()
- self.assertRaises(NeedMore, lambda: lexer.add('foo\\'))
- self.assertRaises(NeedMore, lambda: lexer.add('ba'))
- lexer.add('r', True)
- self.assertEqual('foo\\bar', lexer.token().value)
-
- def test_backquote(self):
- lexer = PLYLexer()
- self.assertRaises(NeedMore, lambda: lexer.add('foo `'))
- self.assertRaises(NeedMore, lambda: lexer.add('ba'))
- lexer.add('r`', True)
- self.assertEqual('foo', lexer.token().value)
- self.assertEqual('`bar`', lexer.token().value)
-
- def test_dollar(self):
- lexer = PLYLexer()
- self.assertRaises(NeedMore, lambda: lexer.add('foo$'))
- self.assertRaises(NeedMore, lambda: lexer.add('ba'))
- lexer.add('r', True)
- self.assertEqual('foo$bar', lexer.token().value)
-
- def test_wordlexer(self):
- def lex_with_slices(s, size=None):
- lexer = WordLexer()
-
- result = []
- while 1:
- if size is None or size>=len(s):
- part = s
- else:
- part = s[:size]
- eof = len(s)==len(part)
-
- try:
- wtree, remaining = lexer.add(part, eof)
- result.append(wtree)
- except NeedMore:
- remaining = ''
-
- s = s[len(part)-len(remaining):]
- if not s:
- break
-
- return result
-
- tests = [
- ("'foo'", [["'", 'foo', "'"]]),
- ("$(foo)", [['$(', 'foo', ')']]),
- ("$foo", [['$', 'foo', '']]),
- ("$(foo)$bar", [['$(', 'foo', ')'], ['$', 'bar', '']]),
- ("$( \(a\))", [['$(', ' ', ['\\', '(', ''], 'a', ['\\', ')', ''], '', ')']]),
- ("$( '(a)')", [['$(', ' ', ["'", '(a)', "'"], '', ')']]),
- ("$( \"$(bar)\"plus$foo)", [['$(', ' ', ['"', '', ['$(', 'bar', ')'], '', '"'], 'plus', ['$', 'foo', ''], '', ')']]),
- ("`foo`", [['`', 'foo', '`']]),
- ("${foo}", [['${', 'foo', '}']]),
- ('"a\\$\\a"', [['"', 'a', ['\\', '$', ''], '', ['', '\\a', ''], '', '"']]),
- ('`a\\$\\a`', [['`', 'a', ['\\', '$', ''], '', ['', '\\a', ''], '', '`']]),
- ("\\'", [['\\', "'", '']]),
- # backslash is backslash when single-quoted
- ("'\\foo'", [["'", '\\foo', "'"]]),
- ('"foo\'bar\'baz"', [['"', 'foo\'bar\'baz', '"']]),
- ('"foo\'b\\\\q\'baz"', [['"', "foo'b", ['\\', '\\', ''], "q'baz", '"']]),
- # backslash still escapes double quotes within double quotes
- ('"foo\\"$i\\"bar"', [['"', 'foo', ['\\', '"', ''], '', ['$', 'i', ''], '', ['\\', '"', ''], 'bar', '"']]),
- # backslash should not escape most stuff when within double quotes
- ('"foo\\nbar"', [['"', 'foo', ['', '\\n', ''], 'bar', '"']]),
- ]
-
- def test_with_slices(test, expected, stride):
- try:
- res = lex_with_slices(test, stride)
- self.assertEqual(res, expected)
- self.assertEqual(wordtree_as_string(res), test)
- except Exception, e:
- import traceback
- traceback.print_exc()
- self.fail('%s for %s with stride=%s' % (str(e), test, str(stride)))
-
- for test, expected in tests:
- test_with_slices(test, expected, None)
- test_with_slices(test, expected, 1)
-
-
- def test_makewordtree(self):
- wtree = make_wordtree('foo"bar$(baz)" ble\\tch')
- self.assertEqual(wtree, ['', 'foo', ['"', 'bar', ['$(', 'baz', ')'], '"'], ' ble', ['\\', 't', ''], 'ch', ''])
-
- wtree = make_wordtree('$.')
- self.assertEqual(wtree, ['', '$', '.', ''])
-
- #quotes are not special when parsing here documents
- wtree = make_wordtree('foo"b\'a\'r$(b"a"z)"\\\n b"le\\tch', True)
- self.assertEqual(wtree, ['', 'foo"b\'a\'r', ['$(', 'b', ['"', 'a', '"'], 'z', ')'], '"', '', ' b"le', '\\t', 'ch', ''])
-
- wtree = make_wordtree('""')
- self.assertEqual(wtree, ['', ['"', '', '"'], ''])
-
- def test_normalizewordtree(self):
- tests = [
- (['', 'one', ['', 'two', ['"', 'three', '"'], ''], 'four', ['', 'five', ''], ''],
- ['', 'one', 'two', ['"', 'three', '"'], 'four', 'five', '']),
- (['', 'foo', ['"', 'bar', ['$(', 'baz', ')'], '', '"'], ' bletch', ''],
- ['', 'foo', ['"', 'bar', ['$(', 'baz', ')'], '"'], ' bletch', '']),
- (['', ['"', '', '"'], ''],
- ['', ['"', '', '"'], ''])
- ]
-
- for test, expected in tests:
- self.assertEqual(normalize_wordtree(test), expected)
- if __name__=='__main__':
- unittest.main()