PageRenderTime 49ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/DICK.B1/IronPython/Runtime/NewStringFormatter.cs

https://bitbucket.org/williamybs/uidipythontool
C# | 590 lines | 348 code | 95 blank | 147 comment | 85 complexity | 53c94531afeb974dc7e2c3990e66426b MD5 | raw file
  1. /* ****************************************************************************
  2. *
  3. * Copyright (c) Microsoft Corporation.
  4. *
  5. * This source code is subject to terms and conditions of the Microsoft Public License. A
  6. * copy of the license can be found in the License.html file at the root of this distribution. If
  7. * you cannot locate the Microsoft Public License, please send an email to
  8. * dlr@microsoft.com. By using this source code in any fashion, you are agreeing to be bound
  9. * by the terms of the Microsoft Public License.
  10. *
  11. * You must not remove this notice, or any other, from this software.
  12. *
  13. *
  14. * ***************************************************************************/
  15. using System;
  16. using System.Collections.Generic;
  17. using System.Text;
  18. using System.Diagnostics;
  19. using Microsoft.Scripting;
  20. using Microsoft.Scripting.Runtime;
  21. using Microsoft.Scripting.Utils;
  22. using IronPython.Modules;
  23. using IronPython.Runtime.Operations;
  24. namespace IronPython.Runtime {
  25. /// <summary>
  26. /// New string formatter for 'str'.format(...) calls and support for the Formatter
  27. /// library via the _formatter_parser / _formatter_field_name_split
  28. /// methods.
  29. ///
  30. /// We parse this format:
  31. ///
  32. /// replacement_field = "{" field_name ["!" conversion] [":" format_spec] "}"
  33. /// field_name = (identifier | integer) ("." attribute_name | "[" element_index "]")*
  34. /// attribute_name = identifier
  35. /// element_index = identifier
  36. /// conversion = "r" | "s"
  37. /// format_spec = any char, { must be balanced (for computed values), passed to __format__ method on object
  38. /// </summary>
  39. internal sealed class NewStringFormatter {
  40. private static readonly char[]/*!*/ _brackets = new[] { '{', '}' };
  41. private static readonly char[]/*!*/ _fieldNameEnd = new[] { '{', '}', '!', ':' };
  42. #region Public APIs
  43. /// <summary>
  44. /// Runs the formatting operation on the given format and keyword arguments
  45. /// </summary>
  46. public static string/*!*/ FormatString(PythonContext/*!*/ context, string/*!*/ format, PythonTuple/*!*/ args, IDictionary<object, object>/*!*/ kwArgs) {
  47. ContractUtils.RequiresNotNull(context, "context");
  48. ContractUtils.RequiresNotNull(format, "format");
  49. ContractUtils.RequiresNotNull(args, "args");
  50. ContractUtils.RequiresNotNull(kwArgs, "kwArgs");
  51. return Formatter.FormatString(context, format, args, kwArgs);
  52. }
  53. /// <summary>
  54. /// Gets the formatting information for the given format. This is a list of tuples. The tuples
  55. /// include:
  56. ///
  57. /// text, field name, format spec, conversion
  58. /// </summary>
  59. public static IEnumerable<PythonTuple/*!*/>/*!*/ GetFormatInfo(string/*!*/ format) {
  60. ContractUtils.RequiresNotNull(format, "format");
  61. return StringFormatParser.Parse(format);
  62. }
  63. /// <summary>
  64. /// Parses a field name returning the argument name and an iterable
  65. /// object which can be used to access the individual attribute
  66. /// or element accesses. The iterator yields tuples of:
  67. ///
  68. /// bool (true if attribute, false if element index), attribute/index value
  69. /// </summary>
  70. public static PythonTuple/*!*/ GetFieldNameInfo(string/*!*/ name) {
  71. ContractUtils.RequiresNotNull(name, "name");
  72. FieldName fieldName = ParseFieldName(name, false);
  73. if (String.IsNullOrEmpty(fieldName.ArgumentName)) {
  74. throw PythonOps.ValueError("empty field name");
  75. }
  76. int val;
  77. object argName = fieldName.ArgumentName;
  78. if (Int32.TryParse(fieldName.ArgumentName, out val)) {
  79. argName = ScriptingRuntimeHelpers.Int32ToObject(val);
  80. }
  81. return PythonTuple.MakeTuple(
  82. argName,
  83. AccessorsToPython(fieldName.Accessors)
  84. );
  85. }
  86. #endregion
  87. #region Parsing
  88. /// <summary>
  89. /// Base class used for parsing the format. Subclasss override Text/ReplacementField methods. Those
  90. /// methods get called when they call Parse and then they can do the appropriate actions for the
  91. /// format.
  92. /// </summary>
  93. private struct StringFormatParser {
  94. private readonly string/*!*/ _str;
  95. private int _index;
  96. private StringFormatParser(string/*!*/ text) {
  97. Assert.NotNull(text);
  98. _str = text;
  99. _index = 0;
  100. }
  101. /// <summary>
  102. /// Gets an enumerable object for walking the parsed format.
  103. ///
  104. /// TODO: object array? struct?
  105. /// </summary>
  106. public static IEnumerable<PythonTuple/*!*/>/*!*/ Parse(string/*!*/ text) {
  107. return new StringFormatParser(text).Parse();
  108. }
  109. /// <summary>
  110. /// Provides an enumerable of the parsed format. The elements of the tuple are:
  111. /// the text proceeding the format information
  112. /// the field name
  113. /// the format spec
  114. /// the conversion
  115. /// </summary>
  116. private IEnumerable<PythonTuple/*!*/>/*!*/ Parse() {
  117. int lastTextStart = 0;
  118. while (_index != _str.Length) {
  119. lastTextStart = _index;
  120. _index = _str.IndexOfAny(_brackets, _index);
  121. if (_index == -1) {
  122. // no more formats, send the remaining text.
  123. yield return PythonTuple.MakeTuple(
  124. _str.Substring(lastTextStart, _str.Length - lastTextStart),
  125. null,
  126. null,
  127. null);
  128. break;
  129. }
  130. yield return ParseFormat(lastTextStart);
  131. }
  132. }
  133. private PythonTuple/*!*/ ParseFormat(int lastTextStart) {
  134. // check for {{ or }} and get the text string that we've skipped over
  135. string text;
  136. if (ParseDoubleBracket(lastTextStart, out text)) {
  137. return PythonTuple.MakeTuple(text, null, null, null);
  138. }
  139. int bracketDepth = 1;
  140. char? conversion = null;
  141. string formatSpec = String.Empty;
  142. // all entries have a field name, read it first
  143. string fldName = ParseFieldName(ref bracketDepth);
  144. // check for conversion
  145. bool end = CheckEnd();
  146. if (!end && _str[_index] == '!') {
  147. conversion = ParseConversion();
  148. }
  149. // check for format spec
  150. end = end || CheckEnd();
  151. if (!end && _str[_index] == ':') {
  152. formatSpec = ParseFormatSpec(ref bracketDepth);
  153. }
  154. // verify we hit the end of the format
  155. end = end || CheckEnd();
  156. if (!end) {
  157. throw PythonOps.ValueError("expected ':' after format specifier");
  158. }
  159. // yield the replacement field information
  160. return PythonTuple.MakeTuple(
  161. text,
  162. fldName,
  163. formatSpec,
  164. conversion.HasValue ?
  165. conversion.ToString() :
  166. null
  167. );
  168. }
  169. /// <summary>
  170. /// Handles {{ and }} within the string. Returns true if a double bracket
  171. /// is found and yields the text
  172. /// </summary>
  173. private bool ParseDoubleBracket(int lastTextStart, out string/*!*/ text) {
  174. if (_str[_index] == '}') {
  175. // report the text w/ a single } at the end
  176. _index++;
  177. if (_index == _str.Length || _str[_index] != '}') {
  178. throw PythonOps.ValueError("Single '}}' encountered in format string");
  179. }
  180. text = _str.Substring(lastTextStart, _index - lastTextStart);
  181. _index++;
  182. return true;
  183. } else if (_index == _str.Length - 1) {
  184. throw PythonOps.ValueError("Single '{{' encountered in format string");
  185. } else if (_str[_index + 1] == '{') {
  186. // report the text w/ a single { at the end
  187. text = _str.Substring(lastTextStart, ++_index - lastTextStart);
  188. _index++;
  189. return true;
  190. } else {
  191. // yield the text
  192. text = _str.Substring(lastTextStart, _index++ - lastTextStart);
  193. return false;
  194. }
  195. }
  196. /// <summary>
  197. /// Parses the conversion character and returns it
  198. /// </summary>
  199. private char ParseConversion() {
  200. _index++; // eat the !
  201. if (CheckEnd()) {
  202. throw PythonOps.ValueError("end of format while looking for conversion specifier");
  203. }
  204. return _str[_index++];
  205. }
  206. /// <summary>
  207. /// Checks to see if we're at the end of the format. If there's no more characters left we report
  208. /// the error, otherwise if we hit a } we return true to indicate parsing should stop.
  209. /// </summary>
  210. private bool CheckEnd() {
  211. if (_index == _str.Length) {
  212. throw PythonOps.ValueError("unmatched '{{' in format");
  213. } else if (_str[_index] == '}') {
  214. _index++;
  215. return true;
  216. }
  217. return false;
  218. }
  219. /// <summary>
  220. /// Parses the format spec string and returns it.
  221. /// </summary>
  222. private string/*!*/ ParseFormatSpec(ref int depth) {
  223. _index++; // eat the :
  224. return ParseFieldOrSpecWorker(_brackets, ref depth);
  225. }
  226. /// <summary>
  227. /// Parses the field name and returns it.
  228. /// </summary>
  229. private string/*!*/ ParseFieldName(ref int depth) {
  230. return ParseFieldOrSpecWorker(_fieldNameEnd, ref depth);
  231. }
  232. /// <summary>
  233. /// Handles parsing the field name and the format spec and returns it. At the parse
  234. /// level these are basically the same - field names just have more terminating characters.
  235. ///
  236. /// The most complex part of parsing them is they both allow nested braces and require
  237. /// the braces are matched. Strangely though the braces need to be matched across the
  238. /// combined field and format spec - not within each format.
  239. /// </summary>
  240. private string/*!*/ ParseFieldOrSpecWorker(char[]/*!*/ ends, ref int depth) {
  241. int end = _index - 1;
  242. bool done = false;
  243. do {
  244. end = _str.IndexOfAny(ends, end + 1);
  245. if (end == -1) {
  246. throw PythonOps.ValueError("unmatched '{{' in format");
  247. }
  248. switch (_str[end]) {
  249. case '{': depth++; break;
  250. case '}': depth--; break;
  251. default: done = true; break;
  252. }
  253. } while (!done && depth != 0);
  254. string res = _str.Substring(_index, end - _index);
  255. _index = end;
  256. return res;
  257. }
  258. }
  259. #endregion
  260. #region String Formatter
  261. /// <summary>
  262. /// Provides the built-in string formatter which is exposed to Python via the str.format API.
  263. /// </summary>
  264. private class Formatter {
  265. private readonly PythonContext/*!*/ _context;
  266. private readonly PythonTuple/*!*/ _args;
  267. private readonly IDictionary<object, object>/*!*/ _kwArgs;
  268. private readonly int _depth;
  269. private Formatter(PythonContext/*!*/ context, PythonTuple/*!*/ args, IDictionary<object, object>/*!*/ kwArgs, int depth)
  270. : this(context, args, kwArgs) {
  271. _depth = depth;
  272. }
  273. private Formatter(PythonContext/*!*/ context, PythonTuple/*!*/ args, IDictionary<object, object>/*!*/ kwArgs) {
  274. Assert.NotNull(context, args, kwArgs);
  275. _context = context;
  276. _args = args;
  277. _kwArgs = kwArgs;
  278. }
  279. public static string/*!*/ FormatString(PythonContext/*!*/ context, string/*!*/ format, PythonTuple/*!*/ args, IDictionary<object, object>/*!*/ kwArgs) {
  280. Assert.NotNull(context, args, kwArgs, format);
  281. return new Formatter(context, args, kwArgs).ReplaceText(format);
  282. }
  283. public static string/*!*/ FormatString(PythonContext/*!*/ context, string/*!*/ format, PythonTuple/*!*/ args, IDictionary<object, object>/*!*/ kwArgs, int depth) {
  284. Assert.NotNull(context, args, kwArgs, format);
  285. if (depth == 2) {
  286. throw PythonOps.ValueError("Max string recursion exceeded");
  287. }
  288. return new Formatter(context, args, kwArgs, depth).ReplaceText(format);
  289. }
  290. private string ReplaceText(string format) {
  291. StringBuilder builder = new StringBuilder();
  292. foreach (PythonTuple pt in StringFormatParser.Parse(format)) {
  293. string text = (string)pt[0];
  294. string fieldName = (string)pt[1];
  295. string formatSpec = (string)pt[2];
  296. string conversionStr = (string)pt[3];
  297. char? conversion = conversionStr != null && conversionStr.Length > 0 ? conversionStr[0] : (char?)null;
  298. builder.Append(text);
  299. if (fieldName != null) {
  300. // get the argument value
  301. object argValue = GetArgumentValue(ParseFieldName(fieldName, true));
  302. // apply the conversion
  303. argValue = ApplyConversion(conversion, argValue);
  304. // handle computed format specifiers
  305. formatSpec = ReplaceComputedFormats(formatSpec);
  306. // append the string
  307. builder.Append(Builtin.format(_context.SharedContext, argValue, formatSpec));
  308. }
  309. }
  310. return builder.ToString();
  311. }
  312. /// <summary>
  313. /// Inspects a format spec to see if it contains nested format specs which
  314. /// we need to compute. If so runs another string formatter on the format
  315. /// spec to compute those values.
  316. /// </summary>
  317. private string/*!*/ ReplaceComputedFormats(string/*!*/ formatSpec) {
  318. int computeStart = formatSpec.IndexOf('{');
  319. if (computeStart != -1) {
  320. formatSpec = FormatString(
  321. _context,
  322. formatSpec,
  323. _args,
  324. _kwArgs,
  325. _depth + 1
  326. );
  327. }
  328. return formatSpec;
  329. }
  330. /// <summary>
  331. /// Given the field name gets the object from our arguments running
  332. /// any of the member/index accessors.
  333. /// </summary>
  334. private object GetArgumentValue(FieldName fieldName) {
  335. return DoAccessors(fieldName, GetUnaccessedObject(fieldName));
  336. }
  337. /// <summary>
  338. /// Applies the known built-in conversions to the object if a conversion is
  339. /// specified.
  340. /// </summary>
  341. private object ApplyConversion(char? conversion, object argValue) {
  342. switch (conversion) {
  343. case 'r':
  344. argValue = PythonOps.Repr(_context.SharedContext, argValue);
  345. break;
  346. case 's':
  347. argValue = PythonOps.ToString(_context.SharedContext, argValue);
  348. break;
  349. case null:
  350. // no conversion specified
  351. break;
  352. default:
  353. throw PythonOps.ValueError("Unknown conversion specifier {0}", conversion.Value);
  354. }
  355. return argValue;
  356. }
  357. /// <summary>
  358. /// Gets the initial object represented by the field name - e.g. the 0 or
  359. /// keyword name.
  360. /// </summary>
  361. private object GetUnaccessedObject(FieldName fieldName) {
  362. int argIndex;
  363. object argValue;
  364. // get the object
  365. if (Int32.TryParse(fieldName.ArgumentName, out argIndex)) {
  366. argValue = _args[argIndex];
  367. } else {
  368. argValue = _kwArgs[fieldName.ArgumentName];
  369. }
  370. return argValue;
  371. }
  372. /// <summary>
  373. /// Given the object value runs the accessors in the field name (if any) against the object.
  374. /// </summary>
  375. private object DoAccessors(FieldName fieldName, object argValue) {
  376. foreach (FieldAccessor accessor in fieldName.Accessors) {
  377. // then do any accesses against the object
  378. int intVal;
  379. if (accessor.IsField) {
  380. argValue = PythonOps.GetBoundAttr(
  381. _context.SharedContext,
  382. argValue,
  383. accessor.AttributeName
  384. );
  385. } else if (Int32.TryParse(accessor.AttributeName, out intVal)) {
  386. argValue = PythonOps.GetIndex(
  387. _context.SharedContext,
  388. argValue,
  389. ScriptingRuntimeHelpers.Int32ToObject(intVal)
  390. );
  391. } else {
  392. argValue = PythonOps.GetIndex(
  393. _context.SharedContext,
  394. argValue,
  395. accessor.AttributeName
  396. );
  397. }
  398. }
  399. return argValue;
  400. }
  401. }
  402. #endregion
  403. #region Parser helper functions
  404. /// <summary>
  405. /// Parses the field name including attribute access or element indexing.
  406. /// </summary>
  407. private static FieldName ParseFieldName(string/*!*/ str, bool reportErrors) {
  408. // (identifier | integer) ("." attribute_name | "[" element_index "]")*
  409. int index = 0;
  410. string arg = ParseIdentifier(str, false, ref index);
  411. return new FieldName(arg, ParseFieldAccessors(str, index, reportErrors));
  412. }
  413. /// <summary>
  414. /// Parses the field name including attribute access or element indexing.
  415. /// </summary>
  416. private static IEnumerable<FieldAccessor> ParseFieldAccessors(string/*!*/ str, int index, bool reportErrors) {
  417. // (identifier | integer) ("." attribute_name | "[" element_index "]")*
  418. while (index != str.Length && str[index] != '}') {
  419. char accessType = str[index];
  420. if (accessType == '.' || accessType == '[') {
  421. index++;
  422. bool isIndex = accessType == '[';
  423. string identifier = ParseIdentifier(str, isIndex, ref index);
  424. if (isIndex) {
  425. if (index == str.Length || str[index] != ']') {
  426. throw PythonOps.ValueError("Missing ']' in format string");
  427. }
  428. index++;
  429. }
  430. if (identifier.Length == 0) {
  431. throw PythonOps.ValueError("Empty attribute in format string");
  432. }
  433. yield return new FieldAccessor(identifier, !isIndex);
  434. } else {
  435. if (reportErrors) {
  436. throw PythonOps.ValueError("Only '.' and '[' are valid in format field specifier, got {0}", accessType);
  437. } else {
  438. break;
  439. }
  440. }
  441. }
  442. }
  443. /// <summary>
  444. /// Converts accessors from our internal structure into a PythonTuple matching how CPython
  445. /// exposes these
  446. /// </summary>
  447. private static IEnumerable<PythonTuple> AccessorsToPython(IEnumerable<FieldAccessor> accessors) {
  448. foreach (FieldAccessor accessor in accessors) {
  449. int val;
  450. object attrName = accessor.AttributeName;
  451. if (Int32.TryParse(accessor.AttributeName, out val)) {
  452. attrName = ScriptingRuntimeHelpers.Int32ToObject(val);
  453. }
  454. yield return PythonTuple.MakeTuple(
  455. ScriptingRuntimeHelpers.BooleanToObject(accessor.IsField),
  456. attrName
  457. );
  458. }
  459. }
  460. /// <summary>
  461. /// Parses an identifier and returns it
  462. /// </summary>
  463. private static string/*!*/ ParseIdentifier(string/*!*/ str, bool isIndex, ref int index) {
  464. int start = index;
  465. while (index < str.Length && str[index] != '.' && (isIndex || str[index] != '[') && (!isIndex || str[index] != ']')) {
  466. index++;
  467. }
  468. return str.Substring(start, index - start);
  469. }
  470. /// <summary>
  471. /// Encodes all the information about the field name.
  472. /// </summary>
  473. private struct FieldName {
  474. public readonly string/*!*/ ArgumentName;
  475. public readonly IEnumerable<FieldAccessor>/*!*/ Accessors;
  476. public FieldName(string/*!*/ argumentName, IEnumerable<FieldAccessor>/*!*/ accessors) {
  477. Assert.NotNull(argumentName, accessors);
  478. ArgumentName = argumentName;
  479. Accessors = accessors;
  480. }
  481. }
  482. /// <summary>
  483. /// Encodes a single field accessor (.b or [number] or [str])
  484. /// </summary>
  485. private struct FieldAccessor {
  486. public readonly string AttributeName;
  487. public readonly bool IsField;
  488. public FieldAccessor(string attributeName, bool isField) {
  489. AttributeName = attributeName;
  490. IsField = isField;
  491. }
  492. }
  493. #endregion
  494. }
  495. }