PageRenderTime 42ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/IronPython_Main/Languages/IronPython/IronPython/Runtime/NewStringFormatter.cs

#
C# | 601 lines | 358 code | 95 blank | 148 comment | 91 complexity | 656857555fc4882a133bef4bce1efd80 MD5 | raw file
Possible License(s): GPL-2.0, MPL-2.0-no-copyleft-exception, CPL-1.0, CC-BY-SA-3.0, BSD-3-Clause, ISC, AGPL-3.0, LGPL-2.1, Apache-2.0
  1. /* ****************************************************************************
  2. *
  3. * Copyright (c) Microsoft Corporation.
  4. *
  5. * This source code is subject to terms and conditions of the Apache License, Version 2.0. A
  6. * copy of the license can be found in the License.html file at the root of this distribution. If
  7. * you cannot locate the Apache License, Version 2.0, please send an email to
  8. * dlr@microsoft.com. By using this source code in any fashion, you are agreeing to be bound
  9. * by the terms of the Apache License, Version 2.0.
  10. *
  11. * You must not remove this notice, or any other, from this software.
  12. *
  13. *
  14. * ***************************************************************************/
  15. using System;
  16. using System.Collections.Generic;
  17. using System.Text;
  18. using System.Diagnostics;
  19. using Microsoft.Scripting;
  20. using Microsoft.Scripting.Runtime;
  21. using Microsoft.Scripting.Utils;
  22. using IronPython.Modules;
  23. using IronPython.Runtime.Operations;
  24. namespace IronPython.Runtime {
  25. /// <summary>
  26. /// New string formatter for 'str'.format(...) calls and support for the Formatter
  27. /// library via the _formatter_parser / _formatter_field_name_split
  28. /// methods.
  29. ///
  30. /// We parse this format:
  31. ///
  32. /// replacement_field = "{" field_name ["!" conversion] [":" format_spec] "}"
  33. /// field_name = (identifier | integer) ("." attribute_name | "[" element_index "]")*
  34. /// attribute_name = identifier
  35. /// element_index = identifier
  36. /// conversion = "r" | "s"
  37. /// format_spec = any char, { must be balanced (for computed values), passed to __format__ method on object
  38. /// </summary>
  39. internal sealed class NewStringFormatter {
  40. private static readonly char[]/*!*/ _brackets = new[] { '{', '}' };
  41. private static readonly char[]/*!*/ _fieldNameEnd = new[] { '{', '}', '!', ':' };
  42. #region Public APIs
  43. /// <summary>
  44. /// Runs the formatting operation on the given format and keyword arguments
  45. /// </summary>
  46. public static string/*!*/ FormatString(PythonContext/*!*/ context, string/*!*/ format, PythonTuple/*!*/ args, IDictionary<object, object>/*!*/ kwArgs) {
  47. ContractUtils.RequiresNotNull(context, "context");
  48. ContractUtils.RequiresNotNull(format, "format");
  49. ContractUtils.RequiresNotNull(args, "args");
  50. ContractUtils.RequiresNotNull(kwArgs, "kwArgs");
  51. return Formatter.FormatString(context, format, args, kwArgs);
  52. }
  53. /// <summary>
  54. /// Gets the formatting information for the given format. This is a list of tuples. The tuples
  55. /// include:
  56. ///
  57. /// text, field name, format spec, conversion
  58. /// </summary>
  59. public static IEnumerable<PythonTuple/*!*/>/*!*/ GetFormatInfo(string/*!*/ format) {
  60. ContractUtils.RequiresNotNull(format, "format");
  61. return StringFormatParser.Parse(format);
  62. }
  63. /// <summary>
  64. /// Parses a field name returning the argument name and an iterable
  65. /// object which can be used to access the individual attribute
  66. /// or element accesses. The iterator yields tuples of:
  67. ///
  68. /// bool (true if attribute, false if element index), attribute/index value
  69. /// </summary>
  70. public static PythonTuple/*!*/ GetFieldNameInfo(string/*!*/ name) {
  71. ContractUtils.RequiresNotNull(name, "name");
  72. FieldName fieldName = ParseFieldName(name, false);
  73. if (String.IsNullOrEmpty(fieldName.ArgumentName)) {
  74. throw PythonOps.ValueError("empty field name");
  75. }
  76. int val;
  77. object argName = fieldName.ArgumentName;
  78. if (Int32.TryParse(fieldName.ArgumentName, out val)) {
  79. argName = ScriptingRuntimeHelpers.Int32ToObject(val);
  80. }
  81. return PythonTuple.MakeTuple(
  82. argName,
  83. AccessorsToPython(fieldName.Accessors)
  84. );
  85. }
  86. #endregion
  87. #region Parsing
  88. /// <summary>
  89. /// Base class used for parsing the format. Subclasss override Text/ReplacementField methods. Those
  90. /// methods get called when they call Parse and then they can do the appropriate actions for the
  91. /// format.
  92. /// </summary>
  93. private struct StringFormatParser {
  94. private readonly string/*!*/ _str;
  95. private int _index;
  96. private StringFormatParser(string/*!*/ text) {
  97. Assert.NotNull(text);
  98. _str = text;
  99. _index = 0;
  100. }
  101. /// <summary>
  102. /// Gets an enumerable object for walking the parsed format.
  103. ///
  104. /// TODO: object array? struct?
  105. /// </summary>
  106. public static IEnumerable<PythonTuple/*!*/>/*!*/ Parse(string/*!*/ text) {
  107. return new StringFormatParser(text).Parse();
  108. }
  109. /// <summary>
  110. /// Provides an enumerable of the parsed format. The elements of the tuple are:
  111. /// the text preceding the format information
  112. /// the field name
  113. /// the format spec
  114. /// the conversion
  115. /// </summary>
  116. private IEnumerable<PythonTuple/*!*/>/*!*/ Parse() {
  117. int lastTextStart = 0;
  118. while (_index != _str.Length) {
  119. lastTextStart = _index;
  120. _index = _str.IndexOfAny(_brackets, _index);
  121. if (_index == -1) {
  122. // no more formats, send the remaining text.
  123. yield return PythonTuple.MakeTuple(
  124. _str.Substring(lastTextStart, _str.Length - lastTextStart),
  125. null,
  126. null,
  127. null);
  128. break;
  129. }
  130. yield return ParseFormat(lastTextStart);
  131. }
  132. }
  133. private PythonTuple/*!*/ ParseFormat(int lastTextStart) {
  134. // check for {{ or }} and get the text string that we've skipped over
  135. string text;
  136. if (ParseDoubleBracket(lastTextStart, out text)) {
  137. return PythonTuple.MakeTuple(text, null, null, null);
  138. }
  139. int bracketDepth = 1;
  140. char? conversion = null;
  141. string formatSpec = String.Empty;
  142. // all entries have a field name, read it first
  143. string fldName = ParseFieldName(ref bracketDepth);
  144. // check for conversion
  145. bool end = CheckEnd();
  146. if (!end && _str[_index] == '!') {
  147. conversion = ParseConversion();
  148. }
  149. // check for format spec
  150. end = end || CheckEnd();
  151. if (!end && _str[_index] == ':') {
  152. formatSpec = ParseFormatSpec(ref bracketDepth);
  153. }
  154. // verify we hit the end of the format
  155. end = end || CheckEnd();
  156. if (!end) {
  157. throw PythonOps.ValueError("expected ':' after format specifier");
  158. }
  159. // yield the replacement field information
  160. return PythonTuple.MakeTuple(
  161. text,
  162. fldName,
  163. formatSpec,
  164. conversion.HasValue ?
  165. conversion.ToString() :
  166. null
  167. );
  168. }
  169. /// <summary>
  170. /// Handles {{ and }} within the string. Returns true if a double bracket
  171. /// is found and yields the text
  172. /// </summary>
  173. private bool ParseDoubleBracket(int lastTextStart, out string/*!*/ text) {
  174. if (_str[_index] == '}') {
  175. // report the text w/ a single } at the end
  176. _index++;
  177. if (_index == _str.Length || _str[_index] != '}') {
  178. throw PythonOps.ValueError("Single '}}' encountered in format string");
  179. }
  180. text = _str.Substring(lastTextStart, _index - lastTextStart);
  181. _index++;
  182. return true;
  183. } else if (_index == _str.Length - 1) {
  184. throw PythonOps.ValueError("Single '{{' encountered in format string");
  185. } else if (_str[_index + 1] == '{') {
  186. // report the text w/ a single { at the end
  187. text = _str.Substring(lastTextStart, ++_index - lastTextStart);
  188. _index++;
  189. return true;
  190. } else {
  191. // yield the text
  192. text = _str.Substring(lastTextStart, _index++ - lastTextStart);
  193. return false;
  194. }
  195. }
  196. /// <summary>
  197. /// Parses the conversion character and returns it
  198. /// </summary>
  199. private char ParseConversion() {
  200. _index++; // eat the !
  201. if (CheckEnd()) {
  202. throw PythonOps.ValueError("end of format while looking for conversion specifier");
  203. }
  204. return _str[_index++];
  205. }
  206. /// <summary>
  207. /// Checks to see if we're at the end of the format. If there's no more characters left we report
  208. /// the error, otherwise if we hit a } we return true to indicate parsing should stop.
  209. /// </summary>
  210. private bool CheckEnd() {
  211. if (_index == _str.Length) {
  212. throw PythonOps.ValueError("unmatched '{{' in format");
  213. } else if (_str[_index] == '}') {
  214. _index++;
  215. return true;
  216. }
  217. return false;
  218. }
  219. /// <summary>
  220. /// Parses the format spec string and returns it.
  221. /// </summary>
  222. private string/*!*/ ParseFormatSpec(ref int depth) {
  223. _index++; // eat the :
  224. return ParseFieldOrSpecWorker(_brackets, ref depth);
  225. }
  226. /// <summary>
  227. /// Parses the field name and returns it.
  228. /// </summary>
  229. private string/*!*/ ParseFieldName(ref int depth) {
  230. return ParseFieldOrSpecWorker(_fieldNameEnd, ref depth);
  231. }
  232. /// <summary>
  233. /// Handles parsing the field name and the format spec and returns it. At the parse
  234. /// level these are basically the same - field names just have more terminating characters.
  235. ///
  236. /// The most complex part of parsing them is they both allow nested braces and require
  237. /// the braces are matched. Strangely though the braces need to be matched across the
  238. /// combined field and format spec - not within each format.
  239. /// </summary>
  240. private string/*!*/ ParseFieldOrSpecWorker(char[]/*!*/ ends, ref int depth) {
  241. int end = _index - 1;
  242. bool done = false;
  243. do {
  244. end = _str.IndexOfAny(ends, end + 1);
  245. if (end == -1) {
  246. throw PythonOps.ValueError("unmatched '{{' in format");
  247. }
  248. switch (_str[end]) {
  249. case '{': depth++; break;
  250. case '}': depth--; break;
  251. default: done = true; break;
  252. }
  253. } while (!done && depth != 0);
  254. string res = _str.Substring(_index, end - _index);
  255. _index = end;
  256. return res;
  257. }
  258. }
  259. #endregion
  260. #region String Formatter
  261. /// <summary>
  262. /// Provides the built-in string formatter which is exposed to Python via the str.format API.
  263. /// </summary>
  264. private class Formatter {
  265. private readonly PythonContext/*!*/ _context;
  266. private readonly PythonTuple/*!*/ _args;
  267. private readonly IDictionary<object, object>/*!*/ _kwArgs;
  268. private readonly int _depth;
  269. private int _autoNumberedIndex;
  270. private Formatter(PythonContext/*!*/ context, PythonTuple/*!*/ args, IDictionary<object, object>/*!*/ kwArgs, int depth)
  271. : this(context, args, kwArgs) {
  272. _depth = depth;
  273. }
  274. private Formatter(PythonContext/*!*/ context, PythonTuple/*!*/ args, IDictionary<object, object>/*!*/ kwArgs) {
  275. Assert.NotNull(context, args, kwArgs);
  276. _context = context;
  277. _args = args;
  278. _kwArgs = kwArgs;
  279. }
  280. public static string/*!*/ FormatString(PythonContext/*!*/ context, string/*!*/ format, PythonTuple/*!*/ args, IDictionary<object, object>/*!*/ kwArgs) {
  281. Assert.NotNull(context, args, kwArgs, format);
  282. return new Formatter(context, args, kwArgs).ReplaceText(format);
  283. }
  284. public static string/*!*/ FormatString(PythonContext/*!*/ context, string/*!*/ format, PythonTuple/*!*/ args, IDictionary<object, object>/*!*/ kwArgs, int depth) {
  285. Assert.NotNull(context, args, kwArgs, format);
  286. if (depth == 2) {
  287. throw PythonOps.ValueError("Max string recursion exceeded");
  288. }
  289. return new Formatter(context, args, kwArgs, depth).ReplaceText(format);
  290. }
  291. private string ReplaceText(string format) {
  292. StringBuilder builder = new StringBuilder();
  293. foreach (PythonTuple pt in StringFormatParser.Parse(format)) {
  294. string text = (string)pt[0];
  295. string fieldName = (string)pt[1];
  296. string formatSpec = (string)pt[2];
  297. string conversionStr = (string)pt[3];
  298. char? conversion = conversionStr != null && conversionStr.Length > 0 ? conversionStr[0] : (char?)null;
  299. builder.Append(text);
  300. if (fieldName != null) {
  301. // get the argument value
  302. object argValue = GetArgumentValue(ParseFieldName(fieldName, true));
  303. // apply the conversion
  304. argValue = ApplyConversion(conversion, argValue);
  305. // handle computed format specifiers
  306. formatSpec = ReplaceComputedFormats(formatSpec);
  307. // append the string
  308. builder.Append(Builtin.format(_context.SharedContext, argValue, formatSpec));
  309. }
  310. }
  311. return builder.ToString();
  312. }
  313. /// <summary>
  314. /// Inspects a format spec to see if it contains nested format specs which
  315. /// we need to compute. If so runs another string formatter on the format
  316. /// spec to compute those values.
  317. /// </summary>
  318. private string/*!*/ ReplaceComputedFormats(string/*!*/ formatSpec) {
  319. int computeStart = formatSpec.IndexOf('{');
  320. if (computeStart != -1) {
  321. formatSpec = FormatString(
  322. _context,
  323. formatSpec,
  324. _args,
  325. _kwArgs,
  326. _depth + 1
  327. );
  328. }
  329. return formatSpec;
  330. }
  331. /// <summary>
  332. /// Given the field name gets the object from our arguments running
  333. /// any of the member/index accessors.
  334. /// </summary>
  335. private object GetArgumentValue(FieldName fieldName) {
  336. return DoAccessors(fieldName, GetUnaccessedObject(fieldName));
  337. }
  338. /// <summary>
  339. /// Applies the known built-in conversions to the object if a conversion is
  340. /// specified.
  341. /// </summary>
  342. private object ApplyConversion(char? conversion, object argValue) {
  343. switch (conversion) {
  344. case 'r':
  345. argValue = PythonOps.Repr(_context.SharedContext, argValue);
  346. break;
  347. case 's':
  348. argValue = PythonOps.ToString(_context.SharedContext, argValue);
  349. break;
  350. case null:
  351. // no conversion specified
  352. break;
  353. default:
  354. throw PythonOps.ValueError("Unknown conversion specifier {0}", conversion.Value);
  355. }
  356. return argValue;
  357. }
  358. /// <summary>
  359. /// Gets the initial object represented by the field name - e.g. the 0 or
  360. /// keyword name.
  361. /// </summary>
  362. private object GetUnaccessedObject(FieldName fieldName) {
  363. int argIndex;
  364. object argValue;
  365. // get the object
  366. if (fieldName.ArgumentName.Length == 0) {
  367. // auto-numbering of format specifiers
  368. if (_autoNumberedIndex == -1) {
  369. throw PythonOps.ValueError("cannot switch from manual field specification to automatic field numbering");
  370. }
  371. argValue = _args[_autoNumberedIndex++];
  372. } else if (Int32.TryParse(fieldName.ArgumentName, out argIndex)) {
  373. if (_autoNumberedIndex > 0) {
  374. throw PythonOps.ValueError("cannot switch from automatic field numbering to manual field specification");
  375. }
  376. _autoNumberedIndex = -1;
  377. argValue = _args[argIndex];
  378. } else {
  379. argValue = _kwArgs[fieldName.ArgumentName];
  380. }
  381. return argValue;
  382. }
  383. /// <summary>
  384. /// Given the object value runs the accessors in the field name (if any) against the object.
  385. /// </summary>
  386. private object DoAccessors(FieldName fieldName, object argValue) {
  387. foreach (FieldAccessor accessor in fieldName.Accessors) {
  388. // then do any accesses against the object
  389. int intVal;
  390. if (accessor.IsField) {
  391. argValue = PythonOps.GetBoundAttr(
  392. _context.SharedContext,
  393. argValue,
  394. accessor.AttributeName
  395. );
  396. } else if (Int32.TryParse(accessor.AttributeName, out intVal)) {
  397. argValue = PythonOps.GetIndex(
  398. _context.SharedContext,
  399. argValue,
  400. ScriptingRuntimeHelpers.Int32ToObject(intVal)
  401. );
  402. } else {
  403. argValue = PythonOps.GetIndex(
  404. _context.SharedContext,
  405. argValue,
  406. accessor.AttributeName
  407. );
  408. }
  409. }
  410. return argValue;
  411. }
  412. }
  413. #endregion
  414. #region Parser helper functions
  415. /// <summary>
  416. /// Parses the field name including attribute access or element indexing.
  417. /// </summary>
  418. private static FieldName ParseFieldName(string/*!*/ str, bool reportErrors) {
  419. // (identifier | integer) ("." attribute_name | "[" element_index "]")*
  420. int index = 0;
  421. string arg = ParseIdentifier(str, false, ref index);
  422. return new FieldName(arg, ParseFieldAccessors(str, index, reportErrors));
  423. }
  424. /// <summary>
  425. /// Parses the field name including attribute access or element indexing.
  426. /// </summary>
  427. private static IEnumerable<FieldAccessor> ParseFieldAccessors(string/*!*/ str, int index, bool reportErrors) {
  428. // (identifier | integer) ("." attribute_name | "[" element_index "]")*
  429. while (index != str.Length && str[index] != '}') {
  430. char accessType = str[index];
  431. if (accessType == '.' || accessType == '[') {
  432. index++;
  433. bool isIndex = accessType == '[';
  434. string identifier = ParseIdentifier(str, isIndex, ref index);
  435. if (isIndex) {
  436. if (index == str.Length || str[index] != ']') {
  437. throw PythonOps.ValueError("Missing ']' in format string");
  438. }
  439. index++;
  440. }
  441. if (identifier.Length == 0) {
  442. throw PythonOps.ValueError("Empty attribute in format string");
  443. }
  444. yield return new FieldAccessor(identifier, !isIndex);
  445. } else {
  446. if (reportErrors) {
  447. throw PythonOps.ValueError("Only '.' and '[' are valid in format field specifier, got {0}", accessType);
  448. } else {
  449. break;
  450. }
  451. }
  452. }
  453. }
  454. /// <summary>
  455. /// Converts accessors from our internal structure into a PythonTuple matching how CPython
  456. /// exposes these
  457. /// </summary>
  458. private static IEnumerable<PythonTuple> AccessorsToPython(IEnumerable<FieldAccessor> accessors) {
  459. foreach (FieldAccessor accessor in accessors) {
  460. int val;
  461. object attrName = accessor.AttributeName;
  462. if (Int32.TryParse(accessor.AttributeName, out val)) {
  463. attrName = ScriptingRuntimeHelpers.Int32ToObject(val);
  464. }
  465. yield return PythonTuple.MakeTuple(
  466. ScriptingRuntimeHelpers.BooleanToObject(accessor.IsField),
  467. attrName
  468. );
  469. }
  470. }
  471. /// <summary>
  472. /// Parses an identifier and returns it
  473. /// </summary>
  474. private static string/*!*/ ParseIdentifier(string/*!*/ str, bool isIndex, ref int index) {
  475. int start = index;
  476. while (index < str.Length && str[index] != '.' && (isIndex || str[index] != '[') && (!isIndex || str[index] != ']')) {
  477. index++;
  478. }
  479. return str.Substring(start, index - start);
  480. }
  481. /// <summary>
  482. /// Encodes all the information about the field name.
  483. /// </summary>
  484. private struct FieldName {
  485. public readonly string/*!*/ ArgumentName;
  486. public readonly IEnumerable<FieldAccessor>/*!*/ Accessors;
  487. public FieldName(string/*!*/ argumentName, IEnumerable<FieldAccessor>/*!*/ accessors) {
  488. Assert.NotNull(argumentName, accessors);
  489. ArgumentName = argumentName;
  490. Accessors = accessors;
  491. }
  492. }
  493. /// <summary>
  494. /// Encodes a single field accessor (.b or [number] or [str])
  495. /// </summary>
  496. private struct FieldAccessor {
  497. public readonly string AttributeName;
  498. public readonly bool IsField;
  499. public FieldAccessor(string attributeName, bool isField) {
  500. AttributeName = attributeName;
  501. IsField = isField;
  502. }
  503. }
  504. #endregion
  505. }
  506. }