PageRenderTime 36ms CodeModel.GetById 8ms RepoModel.GetById 0ms app.codeStats 0ms

/Lib/test/test_shlex.py

https://gitlab.com/unofficial-mirrors/cpython
Python | 318 lines | 313 code | 4 blank | 1 comment | 0 complexity | 5d2f5f19b5908cb2b98696af57572a8b MD5 | raw file
  1. import io
  2. import shlex
  3. import string
  4. import unittest
  5. # The original test data set was from shellwords, by Hartmut Goebel.
  6. data = r"""x|x|
  7. foo bar|foo|bar|
  8. foo bar|foo|bar|
  9. foo bar |foo|bar|
  10. foo bar bla fasel|foo|bar|bla|fasel|
  11. x y z xxxx|x|y|z|xxxx|
  12. \x bar|\|x|bar|
  13. \ x bar|\|x|bar|
  14. \ bar|\|bar|
  15. foo \x bar|foo|\|x|bar|
  16. foo \ x bar|foo|\|x|bar|
  17. foo \ bar|foo|\|bar|
  18. foo "bar" bla|foo|"bar"|bla|
  19. "foo" "bar" "bla"|"foo"|"bar"|"bla"|
  20. "foo" bar "bla"|"foo"|bar|"bla"|
  21. "foo" bar bla|"foo"|bar|bla|
  22. foo 'bar' bla|foo|'bar'|bla|
  23. 'foo' 'bar' 'bla'|'foo'|'bar'|'bla'|
  24. 'foo' bar 'bla'|'foo'|bar|'bla'|
  25. 'foo' bar bla|'foo'|bar|bla|
  26. blurb foo"bar"bar"fasel" baz|blurb|foo"bar"bar"fasel"|baz|
  27. blurb foo'bar'bar'fasel' baz|blurb|foo'bar'bar'fasel'|baz|
  28. ""|""|
  29. ''|''|
  30. foo "" bar|foo|""|bar|
  31. foo '' bar|foo|''|bar|
  32. foo "" "" "" bar|foo|""|""|""|bar|
  33. foo '' '' '' bar|foo|''|''|''|bar|
  34. \""|\|""|
  35. "\"|"\"|
  36. "foo\ bar"|"foo\ bar"|
  37. "foo\\ bar"|"foo\\ bar"|
  38. "foo\\ bar\"|"foo\\ bar\"|
  39. "foo\\" bar\""|"foo\\"|bar|\|""|
  40. "foo\\ bar\" dfadf"|"foo\\ bar\"|dfadf"|
  41. "foo\\\ bar\" dfadf"|"foo\\\ bar\"|dfadf"|
  42. "foo\\\x bar\" dfadf"|"foo\\\x bar\"|dfadf"|
  43. "foo\x bar\" dfadf"|"foo\x bar\"|dfadf"|
  44. \''|\|''|
  45. 'foo\ bar'|'foo\ bar'|
  46. 'foo\\ bar'|'foo\\ bar'|
  47. "foo\\\x bar\" df'a\ 'df'|"foo\\\x bar\"|df'a|\|'df'|
  48. \"foo"|\|"foo"|
  49. \"foo"\x|\|"foo"|\|x|
  50. "foo\x"|"foo\x"|
  51. "foo\ "|"foo\ "|
  52. foo\ xx|foo|\|xx|
  53. foo\ x\x|foo|\|x|\|x|
  54. foo\ x\x\""|foo|\|x|\|x|\|""|
  55. "foo\ x\x"|"foo\ x\x"|
  56. "foo\ x\x\\"|"foo\ x\x\\"|
  57. "foo\ x\x\\""foobar"|"foo\ x\x\\"|"foobar"|
  58. "foo\ x\x\\"\''"foobar"|"foo\ x\x\\"|\|''|"foobar"|
  59. "foo\ x\x\\"\'"fo'obar"|"foo\ x\x\\"|\|'"fo'|obar"|
  60. "foo\ x\x\\"\'"fo'obar" 'don'\''t'|"foo\ x\x\\"|\|'"fo'|obar"|'don'|\|''|t'|
  61. 'foo\ bar'|'foo\ bar'|
  62. 'foo\\ bar'|'foo\\ bar'|
  63. foo\ bar|foo|\|bar|
  64. foo#bar\nbaz|foobaz|
  65. :-) ;-)|:|-|)|;|-|)|
  66. áéíóú|á|é|í|ó|ú|
  67. """
  68. posix_data = r"""x|x|
  69. foo bar|foo|bar|
  70. foo bar|foo|bar|
  71. foo bar |foo|bar|
  72. foo bar bla fasel|foo|bar|bla|fasel|
  73. x y z xxxx|x|y|z|xxxx|
  74. \x bar|x|bar|
  75. \ x bar| x|bar|
  76. \ bar| bar|
  77. foo \x bar|foo|x|bar|
  78. foo \ x bar|foo| x|bar|
  79. foo \ bar|foo| bar|
  80. foo "bar" bla|foo|bar|bla|
  81. "foo" "bar" "bla"|foo|bar|bla|
  82. "foo" bar "bla"|foo|bar|bla|
  83. "foo" bar bla|foo|bar|bla|
  84. foo 'bar' bla|foo|bar|bla|
  85. 'foo' 'bar' 'bla'|foo|bar|bla|
  86. 'foo' bar 'bla'|foo|bar|bla|
  87. 'foo' bar bla|foo|bar|bla|
  88. blurb foo"bar"bar"fasel" baz|blurb|foobarbarfasel|baz|
  89. blurb foo'bar'bar'fasel' baz|blurb|foobarbarfasel|baz|
  90. ""||
  91. ''||
  92. foo "" bar|foo||bar|
  93. foo '' bar|foo||bar|
  94. foo "" "" "" bar|foo||||bar|
  95. foo '' '' '' bar|foo||||bar|
  96. \"|"|
  97. "\""|"|
  98. "foo\ bar"|foo\ bar|
  99. "foo\\ bar"|foo\ bar|
  100. "foo\\ bar\""|foo\ bar"|
  101. "foo\\" bar\"|foo\|bar"|
  102. "foo\\ bar\" dfadf"|foo\ bar" dfadf|
  103. "foo\\\ bar\" dfadf"|foo\\ bar" dfadf|
  104. "foo\\\x bar\" dfadf"|foo\\x bar" dfadf|
  105. "foo\x bar\" dfadf"|foo\x bar" dfadf|
  106. \'|'|
  107. 'foo\ bar'|foo\ bar|
  108. 'foo\\ bar'|foo\\ bar|
  109. "foo\\\x bar\" df'a\ 'df"|foo\\x bar" df'a\ 'df|
  110. \"foo|"foo|
  111. \"foo\x|"foox|
  112. "foo\x"|foo\x|
  113. "foo\ "|foo\ |
  114. foo\ xx|foo xx|
  115. foo\ x\x|foo xx|
  116. foo\ x\x\"|foo xx"|
  117. "foo\ x\x"|foo\ x\x|
  118. "foo\ x\x\\"|foo\ x\x\|
  119. "foo\ x\x\\""foobar"|foo\ x\x\foobar|
  120. "foo\ x\x\\"\'"foobar"|foo\ x\x\'foobar|
  121. "foo\ x\x\\"\'"fo'obar"|foo\ x\x\'fo'obar|
  122. "foo\ x\x\\"\'"fo'obar" 'don'\''t'|foo\ x\x\'fo'obar|don't|
  123. "foo\ x\x\\"\'"fo'obar" 'don'\''t' \\|foo\ x\x\'fo'obar|don't|\|
  124. 'foo\ bar'|foo\ bar|
  125. 'foo\\ bar'|foo\\ bar|
  126. foo\ bar|foo bar|
  127. foo#bar\nbaz|foo|baz|
  128. :-) ;-)|:-)|;-)|
  129. áéíóú|áéíóú|
  130. """
  131. class ShlexTest(unittest.TestCase):
  132. def setUp(self):
  133. self.data = [x.split("|")[:-1]
  134. for x in data.splitlines()]
  135. self.posix_data = [x.split("|")[:-1]
  136. for x in posix_data.splitlines()]
  137. for item in self.data:
  138. item[0] = item[0].replace(r"\n", "\n")
  139. for item in self.posix_data:
  140. item[0] = item[0].replace(r"\n", "\n")
  141. def splitTest(self, data, comments):
  142. for i in range(len(data)):
  143. l = shlex.split(data[i][0], comments=comments)
  144. self.assertEqual(l, data[i][1:],
  145. "%s: %s != %s" %
  146. (data[i][0], l, data[i][1:]))
  147. def oldSplit(self, s):
  148. ret = []
  149. lex = shlex.shlex(io.StringIO(s))
  150. tok = lex.get_token()
  151. while tok:
  152. ret.append(tok)
  153. tok = lex.get_token()
  154. return ret
  155. def testSplitPosix(self):
  156. """Test data splitting with posix parser"""
  157. self.splitTest(self.posix_data, comments=True)
  158. def testCompat(self):
  159. """Test compatibility interface"""
  160. for i in range(len(self.data)):
  161. l = self.oldSplit(self.data[i][0])
  162. self.assertEqual(l, self.data[i][1:],
  163. "%s: %s != %s" %
  164. (self.data[i][0], l, self.data[i][1:]))
  165. def testSyntaxSplitAmpersandAndPipe(self):
  166. """Test handling of syntax splitting of &, |"""
  167. # Could take these forms: &&, &, |&, ;&, ;;&
  168. # of course, the same applies to | and ||
  169. # these should all parse to the same output
  170. for delimiter in ('&&', '&', '|&', ';&', ';;&',
  171. '||', '|', '&|', ';|', ';;|'):
  172. src = ['echo hi %s echo bye' % delimiter,
  173. 'echo hi%secho bye' % delimiter]
  174. ref = ['echo', 'hi', delimiter, 'echo', 'bye']
  175. for ss in src:
  176. s = shlex.shlex(ss, punctuation_chars=True)
  177. result = list(s)
  178. self.assertEqual(ref, result, "While splitting '%s'" % ss)
  179. def testSyntaxSplitSemicolon(self):
  180. """Test handling of syntax splitting of ;"""
  181. # Could take these forms: ;, ;;, ;&, ;;&
  182. # these should all parse to the same output
  183. for delimiter in (';', ';;', ';&', ';;&'):
  184. src = ['echo hi %s echo bye' % delimiter,
  185. 'echo hi%s echo bye' % delimiter,
  186. 'echo hi%secho bye' % delimiter]
  187. ref = ['echo', 'hi', delimiter, 'echo', 'bye']
  188. for ss in src:
  189. s = shlex.shlex(ss, punctuation_chars=True)
  190. result = list(s)
  191. self.assertEqual(ref, result, "While splitting '%s'" % ss)
  192. def testSyntaxSplitRedirect(self):
  193. """Test handling of syntax splitting of >"""
  194. # of course, the same applies to <, |
  195. # these should all parse to the same output
  196. for delimiter in ('<', '|'):
  197. src = ['echo hi %s out' % delimiter,
  198. 'echo hi%s out' % delimiter,
  199. 'echo hi%sout' % delimiter]
  200. ref = ['echo', 'hi', delimiter, 'out']
  201. for ss in src:
  202. s = shlex.shlex(ss, punctuation_chars=True)
  203. result = list(s)
  204. self.assertEqual(ref, result, "While splitting '%s'" % ss)
  205. def testSyntaxSplitParen(self):
  206. """Test handling of syntax splitting of ()"""
  207. # these should all parse to the same output
  208. src = ['( echo hi )',
  209. '(echo hi)']
  210. ref = ['(', 'echo', 'hi', ')']
  211. for ss in src:
  212. s = shlex.shlex(ss, punctuation_chars=True)
  213. result = list(s)
  214. self.assertEqual(ref, result, "While splitting '%s'" % ss)
  215. def testSyntaxSplitCustom(self):
  216. """Test handling of syntax splitting with custom chars"""
  217. ref = ['~/a', '&', '&', 'b-c', '--color=auto', '||', 'd', '*.py?']
  218. ss = "~/a && b-c --color=auto || d *.py?"
  219. s = shlex.shlex(ss, punctuation_chars="|")
  220. result = list(s)
  221. self.assertEqual(ref, result, "While splitting '%s'" % ss)
  222. def testTokenTypes(self):
  223. """Test that tokens are split with types as expected."""
  224. for source, expected in (
  225. ('a && b || c',
  226. [('a', 'a'), ('&&', 'c'), ('b', 'a'),
  227. ('||', 'c'), ('c', 'a')]),
  228. ):
  229. s = shlex.shlex(source, punctuation_chars=True)
  230. observed = []
  231. while True:
  232. t = s.get_token()
  233. if t == s.eof:
  234. break
  235. if t[0] in s.punctuation_chars:
  236. tt = 'c'
  237. else:
  238. tt = 'a'
  239. observed.append((t, tt))
  240. self.assertEqual(observed, expected)
  241. def testPunctuationInWordChars(self):
  242. """Test that any punctuation chars are removed from wordchars"""
  243. s = shlex.shlex('a_b__c', punctuation_chars='_')
  244. self.assertNotIn('_', s.wordchars)
  245. self.assertEqual(list(s), ['a', '_', 'b', '__', 'c'])
  246. def testPunctuationWithWhitespaceSplit(self):
  247. """Test that with whitespace_split, behaviour is as expected"""
  248. s = shlex.shlex('a && b || c', punctuation_chars='&')
  249. # whitespace_split is False, so splitting will be based on
  250. # punctuation_chars
  251. self.assertEqual(list(s), ['a', '&&', 'b', '|', '|', 'c'])
  252. s = shlex.shlex('a && b || c', punctuation_chars='&')
  253. s.whitespace_split = True
  254. # whitespace_split is True, so splitting will be based on
  255. # white space
  256. self.assertEqual(list(s), ['a', '&&', 'b', '||', 'c'])
  257. def testPunctuationWithPosix(self):
  258. """Test that punctuation_chars and posix behave correctly together."""
  259. # see Issue #29132
  260. s = shlex.shlex('f >"abc"', posix=True, punctuation_chars=True)
  261. self.assertEqual(list(s), ['f', '>', 'abc'])
  262. s = shlex.shlex('f >\\"abc\\"', posix=True, punctuation_chars=True)
  263. self.assertEqual(list(s), ['f', '>', '"abc"'])
  264. def testEmptyStringHandling(self):
  265. """Test that parsing of empty strings is correctly handled."""
  266. # see Issue #21999
  267. expected = ['', ')', 'abc']
  268. for punct in (False, True):
  269. s = shlex.shlex("'')abc", posix=True, punctuation_chars=punct)
  270. slist = list(s)
  271. self.assertEqual(slist, expected)
  272. expected = ["''", ')', 'abc']
  273. s = shlex.shlex("'')abc", punctuation_chars=True)
  274. self.assertEqual(list(s), expected)
  275. def testQuote(self):
  276. safeunquoted = string.ascii_letters + string.digits + '@%_-+=:,./'
  277. unicode_sample = '\xe9\xe0\xdf' # e + acute accent, a + grave, sharp s
  278. unsafe = '"`$\\!' + unicode_sample
  279. self.assertEqual(shlex.quote(''), "''")
  280. self.assertEqual(shlex.quote(safeunquoted), safeunquoted)
  281. self.assertEqual(shlex.quote('test file name'), "'test file name'")
  282. for u in unsafe:
  283. self.assertEqual(shlex.quote('test%sname' % u),
  284. "'test%sname'" % u)
  285. for u in unsafe:
  286. self.assertEqual(shlex.quote("test%s'name'" % u),
  287. "'test%s'\"'\"'name'\"'\"''" % u)
  288. # Allow this test to be used with old shlex.py
  289. if not getattr(shlex, "split", None):
  290. for methname in dir(ShlexTest):
  291. if methname.startswith("test") and methname != "testCompat":
  292. delattr(ShlexTest, methname)
  293. if __name__ == "__main__":
  294. unittest.main()