/trunk/Source/Nito.KitchenSink.OptionParsing/ConsoleCommandLineLexer.cs

# · C# · 306 lines · 210 code · 29 blank · 67 comment · 38 complexity · 24b9e081ce7c0a39428e933b43d7ace6 MD5 · raw file

  1. // <copyright file="ConsoleCommandLineLexer.cs" company="Nito Programs">
  2. // Copyright (c) 2011 Nito Programs.
  3. // </copyright>
  4. using System;
  5. using System.Collections.Generic;
  6. using System.Diagnostics.Contracts;
  7. using System.Linq;
  8. using System.Text;
  9. namespace Nito.KitchenSink.OptionParsing
  10. {
  11. /// <summary>
  12. /// Provides the default Win32 Console command-line lexing.
  13. /// </summary>
  14. public static class ConsoleCommandLineLexer
  15. {
  16. private enum LexerState
  17. {
  18. /// <summary>
  19. /// The default state; no data exists in the argument character buffer.
  20. /// </summary>
  21. Default,
  22. /// <summary>
  23. /// An argument has been started.
  24. /// </summary>
  25. Argument,
  26. /// <summary>
  27. /// A quote character has been seen, and we are now parsing quoted data.
  28. /// </summary>
  29. Quoted,
  30. /// <summary>
  31. /// The quote has just been closed, but the argument is still being parsed.
  32. /// </summary>
  33. EndQuotedArgument,
  34. }
  35. /// <summary>
  36. /// A string buffer combined with a backslash count.
  37. /// </summary>
  38. private sealed class Buffer
  39. {
  40. private string result;
  41. private int backslashes;
  42. public Buffer()
  43. {
  44. this.result = string.Empty;
  45. this.backslashes = 0;
  46. }
  47. /// <summary>
  48. /// Adds any outstanding backslashes to the result, and resets the backslash count.
  49. /// </summary>
  50. private void Normalize()
  51. {
  52. this.result += new string('\\', this.backslashes);
  53. this.backslashes = 0;
  54. }
  55. /// <summary>
  56. /// Appends a character to the buffer. If the character is a double-quote, it is treated like an ordinary character. The character may not be a backslash.
  57. /// </summary>
  58. /// <param name="ch">The character. May not be a backslash.</param>
  59. public void AppendNormalChar(char ch)
  60. {
  61. Contract.Requires(ch != '\\');
  62. this.Normalize();
  63. this.result += ch;
  64. }
  65. /// <summary>
  66. /// Appends a backslash to the buffer.
  67. /// </summary>
  68. public void AppendBackslash()
  69. {
  70. ++this.backslashes;
  71. }
  72. /// <summary>
  73. /// Processes a double-quote, which may add it to the buffer. Returns <c>true</c> if there were an even number of backslashes.
  74. /// </summary>
  75. /// <returns><c>true</c> if there were an even number of backslashes.</returns>
  76. public bool AppendQuote()
  77. {
  78. this.result += new string('\\', this.backslashes / 2);
  79. var ret = ((this.backslashes % 2) == 0);
  80. this.backslashes = 0;
  81. if (!ret)
  82. {
  83. // An odd number of backslashes means the double-quote is escaped.
  84. this.result += '"';
  85. }
  86. return ret;
  87. }
  88. /// <summary>
  89. /// Appends a regular character or backslash to the buffer.
  90. /// </summary>
  91. /// <param name="ch">The character to append. May not be a double quote.</param>
  92. public void AppendChar(char ch)
  93. {
  94. Contract.Requires(ch != '"');
  95. if (ch == '\\')
  96. this.AppendBackslash();
  97. else
  98. this.AppendNormalChar(ch);
  99. }
  100. /// <summary>
  101. /// Consumes the buffer so far, resetting the buffer and backslash count.
  102. /// </summary>
  103. /// <returns>The buffer.</returns>
  104. public string Consume()
  105. {
  106. this.Normalize();
  107. var ret = this.result;
  108. this.result = string.Empty;
  109. return ret;
  110. }
  111. }
  112. /// <summary>
  113. /// Lexes the command line, using the same rules as <see cref="Environment.GetCommandLineArgs"/>.
  114. /// </summary>
  115. /// <param name="commandLine">The command line to parse.</param>
  116. /// <returns>The lexed command line.</returns>
  117. public static IEnumerable<string> Lex(this string commandLine)
  118. {
  119. Contract.Requires(commandLine != null);
  120. Contract.Ensures(Contract.Result<IEnumerable<string>>() != null);
  121. // The MSDN information for <see cref="Environment.GetCommandLineArgs"/> is incomplete.
  122. // This blog post fills in the gaps: http://www.hardtoc.com/archives/162 (webcite: http://www.webcitation.org/62LHTVelJ )
  123. LexerState state = LexerState.Default;
  124. Buffer buffer = new Buffer();
  125. foreach (var ch in commandLine)
  126. {
  127. switch (state)
  128. {
  129. case LexerState.Default:
  130. if (ch == '"')
  131. {
  132. // Enter the quoted state, without placing anything in the buffer.
  133. state = LexerState.Quoted;
  134. break;
  135. }
  136. // Whitespace is ignored.
  137. if (ch == ' ' || ch == '\t')
  138. {
  139. break;
  140. }
  141. buffer.AppendChar(ch);
  142. state = LexerState.Argument;
  143. break;
  144. case LexerState.Argument:
  145. // We have an argument started, though it may be just an empty string for now.
  146. if (ch == '"')
  147. {
  148. // Handle the special rules for any backslashes preceding a double-quote.
  149. if (buffer.AppendQuote())
  150. {
  151. // An even number of backslashes means that this is a normal double-quote.
  152. state = LexerState.Quoted;
  153. }
  154. break;
  155. }
  156. if (ch == ' ' || ch == '\t')
  157. {
  158. // Whitespace ends this argument, so publish it and restart in the default state.
  159. yield return buffer.Consume();
  160. state = LexerState.Default;
  161. break;
  162. }
  163. // Count backslashes; put other characters directly into the buffer.
  164. buffer.AppendChar(ch);
  165. break;
  166. case LexerState.Quoted:
  167. // We are within quotes, but may already have characters in the argument buffer.
  168. if (ch == '"')
  169. {
  170. // Handle the special rules for any backslashes preceding a double-quote.
  171. if (buffer.AppendQuote())
  172. {
  173. // An even number of backslashes means that this is a normal double-quote.
  174. state = LexerState.EndQuotedArgument;
  175. }
  176. break;
  177. }
  178. // Any non-quote character (including whitespace) is appended to the argument buffer.
  179. buffer.AppendChar(ch);
  180. break;
  181. case LexerState.EndQuotedArgument:
  182. // This is a special state that is treated like Argument or Quoted depending on whether the next character is a quote. It's not possible to stay in this state.
  183. if (ch == '"')
  184. {
  185. // We just read a double double-quote within a quoted context, so we add the quote to the buffer and re-enter the quoted state.
  186. buffer.AppendNormalChar(ch);
  187. state = LexerState.Quoted;
  188. }
  189. else if (ch == ' ' || ch == '\t')
  190. {
  191. // In this case, the double-quote we just read did in fact end the quotation, so we publish the argument and restart in the default state.
  192. yield return buffer.Consume();
  193. state = LexerState.Default;
  194. }
  195. else
  196. {
  197. // If the double-quote is followed by a non-quote, non-whitespace character, then it's considered a continuation of the argument (leaving the quoted state).
  198. buffer.AppendChar(ch);
  199. state = LexerState.Argument;
  200. }
  201. break;
  202. }
  203. }
  204. // If we end in the middle of an argument (or even a quotation), then we just publish what we have.
  205. if (state != LexerState.Default)
  206. {
  207. yield return buffer.Consume();
  208. }
  209. }
  210. /// <summary>
  211. /// Lexes the command line for this process, using the same rules as <see cref="Environment.GetCommandLineArgs"/>. The returned command line includes the process name.
  212. /// </summary>
  213. /// <returns>The lexed command line.</returns>
  214. public static IEnumerable<string> Lex()
  215. {
  216. return Environment.GetCommandLineArgs();
  217. }
  218. /// <summary>
  219. /// Takes a list of arguments to pass to a program, and quotes them. This method does not quote or escape special shell characters (see <see cref="CommandPromptCommandLine"/>).
  220. /// </summary>
  221. /// <param name="arguments">The arguments to quote (if necessary) and concatenate into a command line.</param>
  222. /// <returns>The command line.</returns>
  223. public static string Escape(IEnumerable<string> arguments)
  224. {
  225. Contract.Requires(arguments != null);
  226. Contract.Ensures(Contract.Result<string>() != null);
  227. // Escape each argument (if necessary) and join them with spaces.
  228. return string.Join(" ", arguments.Select(argument =>
  229. {
  230. Contract.Assume(argument != null);
  231. // An argument does not need escaping if it does not have any whitespace or quote characters.
  232. if (!argument.Any(ch => ch == ' ' || ch == '\t' || ch == '"') && argument != string.Empty)
  233. {
  234. return argument;
  235. }
  236. // To escape the argument, wrap it in double-quotes and escape existing double-quotes, doubling any existing escape characters but only if they precede a double-quote.
  237. var ret = new StringBuilder();
  238. ret.Append('"');
  239. int backslashes = 0;
  240. foreach (var ch in argument)
  241. {
  242. if (ch == '\\')
  243. {
  244. ++backslashes;
  245. }
  246. else if (ch == '"')
  247. {
  248. ret.Append(new string('\\', 2 * backslashes + 1));
  249. backslashes = 0;
  250. ret.Append(ch);
  251. }
  252. else
  253. {
  254. ret.Append(new string('\\', backslashes));
  255. backslashes = 0;
  256. ret.Append(ch);
  257. }
  258. }
  259. ret.Append(new string('\\', backslashes));
  260. ret.Append('"');
  261. return ret.ToString();
  262. }));
  263. }
  264. }
  265. }