test-pyshlex.py - Copyright 2007 Patrick Mezard This softwa…

/tests/test-pyshlex.py

https://bitbucket.org/pmezard/pysh · Python · 206 lines · 148 code · 25 blank · 33 comment · 22 complexity · 9a7a0e887d07bb79dad6258593dfb8a8 MD5 · raw file

# test-pyshlex.py - lexer module unit tests
#
# Copyright 2007 Patrick Mezard
#
# This software may be used and distributed according to the terms
# of the GNU General Public License, incorporated herein by reference.
import unittest
from pysh.pyshlex import *

class TestLexer(unittest.TestCase):
    def test_lexer(self):
        def get_all_tokens(s):
            """Like get_tokens() but exhaust the input sequence and return all tokens
            in a single list.
            """
            lexer = PLYLexer()
            tokens = []
            while s:                
                s = lexer.add(s, True)
                while 1:
                    token = lexer.token()
                    if token is None:
                        break
                    tokens.append(token)
            
            tokens = [(t.value, t.type) for t in tokens]
            return tokens, s       
    
        tests = [
            #Quoting characters must be left in the tokens
            ('\\a\n', ([('\\a', TK_TOKEN), ('\n', TK_NEWLINE)], '')),
            ('c"a"b\n', ([('c"a"b', TK_TOKEN), ('\n', TK_NEWLINE)], '')),
            ("c'a'b\n", ([("c'a'b", TK_TOKEN), ('\n', TK_NEWLINE)], '')),
            #Quoted newlines must disappear
            ('a\\\nb\n', ([('ab', TK_TOKEN), ('\n', TK_NEWLINE)], '')),
            #Simple echo with file redirection
            ('echo s s2 >test.txt\n', ([('echo', TK_TOKEN), ('s', TK_TOKEN), ('s2', TK_TOKEN), ('>', 'GREATER'), ('test.txt', TK_TOKEN), ('\n', TK_NEWLINE)], '')),
            #Pipes and quotes and redirections
            ('hg status 2>&1 |sed -e "s:/.*\(/t/.*\):...\1:" ', ([('hg', 'TOKEN'), ('status', 'TOKEN'), ('2', 'IO_NUMBER'), ('>&', 'GREATAND'), ('1', 'TOKEN'), ('|', 'PIPE'), ('sed', 'TOKEN'), ('-e', 'TOKEN'), ('"s:/.*\\(/t/.*\\):...\x01:"', 'TOKEN')], '')),
            #Backquotes
            ('abspath=`pwd`/badext.py', ([('abspath=`pwd`/badext.py', 'ASSIGNMENT_WORD')], '')),
            #Expansion forms
            ('echo $(FOO)${BAR}$BAZ', ([('echo', 'TOKEN'), ('$(FOO)${BAR}$BAZ', 'TOKEN')], '')),
            #May trigger a NeedMore if the eof flag is not passed correctly to
            #the name matcher.
            ('echo $BAZ', ([('echo', 'TOKEN'), ('$BAZ', 'TOKEN')], '')),
            #Quoted stuff
            ('echo "foo$(BAR)\'baz\'\\"bletch"', ([('echo', 'TOKEN'), ('"foo$(BAR)\'baz\'\\"bletch"', 'TOKEN')], '')),
            #IO_NUMBER are delimited by '<' or '>'
            ('echo 123 > a', ([('echo', 'TOKEN'), ('123', 'TOKEN'), ('>', 'GREATER'), ('a', 'TOKEN')], '')),
            ('hg status 2>&1', ([('hg', 'TOKEN'), ('status', 'TOKEN'), ('2', 'IO_NUMBER'), ('>&', 'GREATAND'), ('1', 'TOKEN')], '')),
            #Here-documents
            ('cat <<eof\nfoo\neof\n', ([('cat', 'TOKEN'), ('<<', 'DLESS'), ('eof', 'HERENAME'), ('foo\n', 'TOKEN'), ('\n', 'NEWLINE')], '')),
                #Double document    
            ('cat <<eof; cat <<- eof2\nfoo\neof\n\t\tbar\neof2\n', ([('cat', 'TOKEN'), ('<<', 'DLESS'), ('eof', 'HERENAME'), ('foo\n', 'TOKEN'), (';', 'COMMA'), ('cat', 'TOKEN'), ('<<-', 'DLESSDASH'), ('eof2', 'HERENAME'), ('bar\n', 'TOKEN'), ('\n', 'NEWLINE')], '')),
                #Opening delimiter with quoted characters
            ('cat <<e\\o"f"\nfoo\neof\n', ([('cat', 'TOKEN'), ('<<', 'DLESS'), ('e\\o"f"', 'HERENAME'), ('foo\n', 'TOKEN'), ('\n', 'NEWLINE')], '')),
                #Test newline in here-document operator following tokens
            ('cat <<eof; echo "fuu\nba\nz"\nbar\neof\n', ([('cat', 'TOKEN'), ('<<', 'DLESS'), ('eof', 'HERENAME'), ('bar\n', 'TOKEN'), (';', 'COMMA'), ('echo', 'TOKEN'), ('"fuu\nba\nz"', 'TOKEN'), ('\n','NEWLINE')], '')),
                #Test comment following here-document opening delimiter
            ('cat <<eof #a comment\nbar\neof\n', ([('cat', 'TOKEN'), ('<<', 'DLESS'), ('eof', 'HERENAME'), ('bar\n', 'TOKEN'), ('\n', 'NEWLINE')], '')),            
                #Test here document terminated by eof (eof being or not the last line)
            ('cat <<eof\nbar', ([('cat', 'TOKEN'), ('<<', 'DLESS'), ('eof', 'HERENAME'), ('bar', 'TOKEN'), ('\n', 'NEWLINE')], '')),
            ('cat <<eof\nbar\neof', ([('cat', 'TOKEN'), ('<<', 'DLESS'), ('eof', 'HERENAME'), ('bar\n', 'TOKEN'), ('\n', 'NEWLINE')], '')),
            #Test 'In' detection in 'For' loop
            ('ls in', ([('ls', 'TOKEN'), ('in', 'TOKEN')], '')),
            #Newline delimiting problem because of trailing spaces
            ('\ncd a       \necho a > a    ', ([('\n', 'NEWLINE'), ('cd', 'TOKEN'), ('a', 'TOKEN'), ('\n', 'NEWLINE'), ('echo', 'TOKEN'), ('a', 'TOKEN'), ('>', 'GREATER'), ('a', 'TOKEN')], '')),            
            #Function definition
            ('cleanpath()\n{\necho function definition\n}', ([('cleanpath', 'TOKEN'), ('(', 'LPARENS'), (')', 'RPARENS'), ('\n', 'NEWLINE'), ('{', 'Lbrace'), ('\n', 'NEWLINE'), ('echo', 'TOKEN'), ('function', 'TOKEN'), ('definition', 'TOKEN'), ('\n', 'NEWLINE'), ('}', 'Rbrace')], '')),
            ('cleanpath ( )\n{\necho function definition\n}', ([('cleanpath', 'TOKEN'), ('(', 'LPARENS'), (')', 'RPARENS'), ('\n', 'NEWLINE'), ('{', 'Lbrace'), ('\n', 'NEWLINE'), ('echo', 'TOKEN'), ('function', 'TOKEN'), ('definition', 'TOKEN'), ('\n', 'NEWLINE'), ('}', 'Rbrace')], '')),
        ]
        for s, expected in tests:
            tokens = get_all_tokens(s)
            self.assertEqual(tokens, expected)
            
        tests = [
            #Missing here-document opening delimiter
            'cat <<\n',
            'cat <<',
        ]
        
        for t in tests:
            try:
                get_all_tokens(t)
            except ShellError:
                pass
            except BaseException, e:
                self.fail("'%s' raised '%s'" % (repr(t),repr(e)))
            else:
                self.fail("'%s' did not raise" % repr(t))
        
            
    def test_backslash(self):
        lexer = PLYLexer()
        self.assertRaises(NeedMore, lambda: lexer.add('foo\\'))
        self.assertRaises(NeedMore, lambda: lexer.add('ba'))
        lexer.add('r', True)
        self.assertEqual('foo\\bar', lexer.token().value)
        
    def test_backquote(self):
        lexer = PLYLexer()
        self.assertRaises(NeedMore, lambda: lexer.add('foo `'))
        self.assertRaises(NeedMore, lambda: lexer.add('ba'))
        lexer.add('r`', True)
        self.assertEqual('foo', lexer.token().value)
        self.assertEqual('`bar`', lexer.token().value)
        
    def test_dollar(self):
        lexer = PLYLexer()
        self.assertRaises(NeedMore, lambda: lexer.add('foo$'))
        self.assertRaises(NeedMore, lambda: lexer.add('ba'))
        lexer.add('r', True)
        self.assertEqual('foo$bar', lexer.token().value)
        
    def test_wordlexer(self):
        def lex_with_slices(s, size=None):
            lexer = WordLexer()
        
            result = []
            while 1:
                if size is None or size>=len(s):
                    part = s
                else:
                    part = s[:size]
                eof = len(s)==len(part)
                
                try:
                    wtree, remaining = lexer.add(part, eof)
                    result.append(wtree)
                except NeedMore:
                    remaining = ''
                    
                s = s[len(part)-len(remaining):]
                if not s:
                    break
                    
            return result
    
        tests = [
            ("'foo'", [["'", 'foo', "'"]]),
            ("$(foo)", [['$(', 'foo', ')']]),
            ("$foo", [['$', 'foo', '']]),
            ("$(foo)$bar", [['$(', 'foo', ')'], ['$', 'bar', '']]),
            ("$( \(a\))", [['$(', ' ', ['\\', '(', ''], 'a', ['\\', ')', ''], '', ')']]),
            ("$( '(a)')", [['$(', ' ', ["'", '(a)', "'"], '', ')']]),
            ("$( \"$(bar)\"plus$foo)", [['$(', ' ', ['"', '', ['$(', 'bar', ')'], '', '"'], 'plus', ['$', 'foo', ''], '', ')']]),
            ("`foo`", [['`', 'foo', '`']]),
            ("${foo}", [['${', 'foo', '}']]),
            ('"a\\$\\a"', [['"', 'a', ['\\', '$', ''], '', ['', '\\a', ''], '', '"']]),
            ('`a\\$\\a`', [['`', 'a', ['\\', '$', ''], '', ['', '\\a', ''], '', '`']]),
            ("\\'", [['\\', "'", '']]),
            # backslash is backslash when single-quoted
            ("'\\foo'", [["'", '\\foo', "'"]]),   
            ('"foo\'bar\'baz"', [['"', 'foo\'bar\'baz', '"']]),   
            ('"foo\'b\\\\q\'baz"', [['"', "foo'b", ['\\', '\\', ''], "q'baz", '"']]),
            # backslash still escapes double quotes within double quotes
            ('"foo\\"$i\\"bar"', [['"', 'foo', ['\\', '"', ''], '', ['$', 'i', ''], '', ['\\', '"', ''], 'bar', '"']]),
            # backslash should not escape most stuff when within double quotes
            ('"foo\\nbar"', [['"', 'foo', ['', '\\n', ''], 'bar', '"']]),
        ]
        
        def test_with_slices(test, expected, stride):
            try:
                res = lex_with_slices(test, stride)
                self.assertEqual(res, expected)
                self.assertEqual(wordtree_as_string(res), test)
            except Exception, e:
                import traceback
                traceback.print_exc()
                self.fail('%s for %s with stride=%s' % (str(e), test, str(stride)))
        
        for test, expected in tests:
            test_with_slices(test, expected, None)
            test_with_slices(test, expected, 1)
        
        
    def test_makewordtree(self):
        wtree = make_wordtree('foo"bar$(baz)" ble\\tch')
        self.assertEqual(wtree, ['', 'foo', ['"', 'bar', ['$(', 'baz', ')'], '"'], ' ble', ['\\', 't', ''], 'ch', ''])
        
        wtree = make_wordtree('$.')
        self.assertEqual(wtree, ['', '$', '.', ''])
        
        #quotes are not special when parsing here documents
        wtree = make_wordtree('foo"b\'a\'r$(b"a"z)"\\\n b"le\\tch', True)
        self.assertEqual(wtree, ['', 'foo"b\'a\'r', ['$(', 'b', ['"', 'a', '"'], 'z', ')'], '"', '', ' b"le', '\\t', 'ch', ''])
        
        wtree = make_wordtree('""')
        self.assertEqual(wtree, ['', ['"', '', '"'], ''])     
        
    def test_normalizewordtree(self):
        tests = [
            (['', 'one', ['', 'two', ['"', 'three', '"'], ''], 'four', ['', 'five', ''], ''], 
                ['', 'one', 'two', ['"', 'three', '"'], 'four', 'five', '']),
            (['', 'foo', ['"', 'bar', ['$(', 'baz', ')'], '', '"'], ' bletch', ''], 
                ['', 'foo', ['"', 'bar', ['$(', 'baz', ')'], '"'], ' bletch', '']),
            (['', ['"', '', '"'], ''], 
                ['', ['"', '', '"'], ''])
        ]
        
        for test, expected in tests:
            self.assertEqual(normalize_wordtree(test), expected)

if __name__=='__main__':
    unittest.main()
Tech Fingerprint

Standard Library: Testing
Alerts (12)

'import *' Avoid to prevent namespace pollution; import specific names or use aliases
8
'def' Ensure functions have docstrings for documentation
11 94 101 109 116 117 163 178 192
Complexity hotspot; lines 121 to 122 (total complexity: 3)
121 122