PageRenderTime 33ms CodeModel.GetById 2ms app.highlight 24ms RepoModel.GetById 1ms app.codeStats 1ms

/IronPython_Main/Languages/IronPython/IronPython/Runtime/NewStringFormatter.cs

#
C# | 601 lines | 358 code | 95 blank | 148 comment | 91 complexity | 656857555fc4882a133bef4bce1efd80 MD5 | raw file
  1/* ****************************************************************************
  2 *
  3 * Copyright (c) Microsoft Corporation. 
  4 *
  5 * This source code is subject to terms and conditions of the Apache License, Version 2.0. A 
  6 * copy of the license can be found in the License.html file at the root of this distribution. If 
  7 * you cannot locate the  Apache License, Version 2.0, please send an email to 
  8 * dlr@microsoft.com. By using this source code in any fashion, you are agreeing to be bound 
  9 * by the terms of the Apache License, Version 2.0.
 10 *
 11 * You must not remove this notice, or any other, from this software.
 12 *
 13 *
 14 * ***************************************************************************/
 15
 16using System;
 17using System.Collections.Generic;
 18using System.Text;
 19using System.Diagnostics;
 20
 21using Microsoft.Scripting;
 22using Microsoft.Scripting.Runtime;
 23using Microsoft.Scripting.Utils;
 24
 25using IronPython.Modules;
 26using IronPython.Runtime.Operations;
 27
 28namespace IronPython.Runtime {
 29    /// <summary>
 30    /// New string formatter for 'str'.format(...) calls and support for the Formatter 
 31    /// library via the _formatter_parser / _formatter_field_name_split
 32    /// methods.
 33    /// 
 34    /// We parse this format:
 35    /// 
 36    /// replacement_field =  "{" field_name ["!" conversion] [":" format_spec] "}"
 37    /// field_name        =  (identifier | integer) ("." attribute_name | "[" element_index "]")*
 38    /// attribute_name    =  identifier
 39    /// element_index     =  identifier
 40    /// conversion        =  "r" | "s"
 41    /// format_spec       = any char, { must be balanced (for computed values), passed to __format__ method on object
 42    /// </summary>
 43    internal sealed class NewStringFormatter {
 44        private static readonly char[]/*!*/ _brackets = new[] { '{', '}' };
 45        private static readonly char[]/*!*/ _fieldNameEnd = new[] { '{', '}', '!', ':' };
 46
 47        #region Public APIs
 48
 49        /// <summary>
 50        /// Runs the formatting operation on the given format and keyword arguments
 51        /// </summary>
 52        public static string/*!*/ FormatString(PythonContext/*!*/ context, string/*!*/ format, PythonTuple/*!*/ args, IDictionary<object, object>/*!*/ kwArgs) {
 53            ContractUtils.RequiresNotNull(context, "context");
 54            ContractUtils.RequiresNotNull(format, "format");
 55            ContractUtils.RequiresNotNull(args, "args");
 56            ContractUtils.RequiresNotNull(kwArgs, "kwArgs");
 57
 58            return Formatter.FormatString(context, format, args, kwArgs);
 59        }
 60
 61        /// <summary>
 62        /// Gets the formatting information for the given format.  This is a list of tuples.  The tuples
 63        /// include:
 64        /// 
 65        /// text, field name, format spec, conversion
 66        /// </summary>
 67        public static IEnumerable<PythonTuple/*!*/>/*!*/ GetFormatInfo(string/*!*/ format) {
 68            ContractUtils.RequiresNotNull(format, "format");
 69
 70            return StringFormatParser.Parse(format);
 71        }
 72
 73        /// <summary>
 74        /// Parses a field name returning the argument name and an iterable
 75        /// object which can be used to access the individual attribute
 76        /// or element accesses.  The iterator yields tuples of:
 77        /// 
 78        /// bool (true if attribute, false if element index), attribute/index value
 79        /// </summary>
 80        public static PythonTuple/*!*/ GetFieldNameInfo(string/*!*/ name) {
 81            ContractUtils.RequiresNotNull(name, "name");
 82
 83
 84            FieldName fieldName = ParseFieldName(name, false);
 85
 86            if (String.IsNullOrEmpty(fieldName.ArgumentName)) {
 87                throw PythonOps.ValueError("empty field name");
 88            }
 89
 90            int val;
 91            object argName = fieldName.ArgumentName;
 92            if (Int32.TryParse(fieldName.ArgumentName, out val)) {
 93                argName = ScriptingRuntimeHelpers.Int32ToObject(val);
 94            }
 95
 96            return PythonTuple.MakeTuple(
 97                argName,
 98                AccessorsToPython(fieldName.Accessors)
 99            );
100        }
101
102        #endregion
103
104        #region Parsing
105
106        /// <summary>
107        /// Base class used for parsing the format.  Subclasss override Text/ReplacementField methods.  Those
108        /// methods get called when they call Parse and then they can do the appropriate actions for the
109        /// format.
110        /// </summary>
111        private struct StringFormatParser {
112            private readonly string/*!*/ _str;
113            private int _index;
114
115            private StringFormatParser(string/*!*/ text) {
116                Assert.NotNull(text);
117
118                _str = text;
119                _index = 0;
120            }
121
122            /// <summary>
123            /// Gets an enumerable object for walking the parsed format.
124            /// 
125            /// TODO: object array?  struct?
126            /// </summary>
127            public static IEnumerable<PythonTuple/*!*/>/*!*/ Parse(string/*!*/ text) {
128                return new StringFormatParser(text).Parse();
129            }
130
131            /// <summary>
132            /// Provides an enumerable of the parsed format.  The elements of the tuple are:
133            ///     the text preceding the format information
134            ///     the field name
135            ///     the format spec
136            ///     the conversion
137            /// </summary>
138            private IEnumerable<PythonTuple/*!*/>/*!*/ Parse() {
139                int lastTextStart = 0;
140                while (_index != _str.Length) {
141                    lastTextStart = _index;
142                    _index = _str.IndexOfAny(_brackets, _index);
143
144                    if (_index == -1) {
145                        // no more formats, send the remaining text.
146                        yield return PythonTuple.MakeTuple(
147                            _str.Substring(lastTextStart, _str.Length - lastTextStart),
148                            null,
149                            null,
150                            null);
151
152                        break;
153                    }
154
155                    yield return ParseFormat(lastTextStart);
156                }
157            }
158
159            private PythonTuple/*!*/ ParseFormat(int lastTextStart) {
160                // check for {{ or }} and get the text string that we've skipped over
161                string text;
162                if (ParseDoubleBracket(lastTextStart, out text)) {
163                    return PythonTuple.MakeTuple(text, null, null, null);
164                }
165
166                int bracketDepth = 1;
167                char? conversion = null;
168                string formatSpec = String.Empty;
169
170                // all entries have a field name, read it first
171                string fldName = ParseFieldName(ref bracketDepth);
172
173                // check for conversion
174                bool end = CheckEnd();
175                if (!end && _str[_index] == '!') {
176                    conversion = ParseConversion();
177                }
178
179                // check for format spec
180                end = end || CheckEnd();
181                if (!end && _str[_index] == ':') {
182                    formatSpec = ParseFormatSpec(ref bracketDepth);
183                }
184
185                // verify we hit the end of the format
186                end = end || CheckEnd();
187                if (!end) {
188                    throw PythonOps.ValueError("expected ':' after format specifier");
189                }
190
191                // yield the replacement field information
192                return PythonTuple.MakeTuple(
193                    text,
194                    fldName,
195                    formatSpec,
196                    conversion.HasValue ?
197                        conversion.ToString() :
198                        null
199                );
200            }
201
202            /// <summary>
203            /// Handles {{ and }} within the string.  Returns true if a double bracket
204            /// is found and yields the text
205            /// </summary>
206            private bool ParseDoubleBracket(int lastTextStart, out string/*!*/ text) {
207                if (_str[_index] == '}') {
208                    // report the text w/ a single } at the end
209                    _index++;
210                    if (_index == _str.Length || _str[_index] != '}') {
211                        throw PythonOps.ValueError("Single '}}' encountered in format string");
212                    }
213
214                    text = _str.Substring(lastTextStart, _index - lastTextStart);
215                    _index++;
216                    return true;
217                } else if (_index == _str.Length - 1) {
218                    throw PythonOps.ValueError("Single '{{' encountered in format string");
219                } else if (_str[_index + 1] == '{') {
220                    // report the text w/ a single { at the end
221                    text = _str.Substring(lastTextStart, ++_index - lastTextStart);
222                    _index++;
223                    return true;
224                } else {
225                    // yield the text
226                    text = _str.Substring(lastTextStart, _index++ - lastTextStart);
227                    return false;
228                }
229            }
230
231            /// <summary>
232            /// Parses the conversion character and returns it
233            /// </summary>
234            private char ParseConversion() {
235                _index++; // eat the !
236                if (CheckEnd()) {
237                    throw PythonOps.ValueError("end of format while looking for conversion specifier");
238                }
239
240                return _str[_index++];
241            }
242
243            /// <summary>
244            /// Checks to see if we're at the end of the format.  If there's no more characters left we report 
245            /// the error, otherwise if we hit a } we return true to indicate parsing should stop.
246            /// </summary>
247            private bool CheckEnd() {
248                if (_index == _str.Length) {
249                    throw PythonOps.ValueError("unmatched '{{' in format");
250                } else if (_str[_index] == '}') {
251                    _index++;
252                    return true;
253                }
254
255                return false;
256            }
257
258            /// <summary>
259            /// Parses the format spec string and returns it.
260            /// </summary>
261            private string/*!*/ ParseFormatSpec(ref int depth) {
262                _index++; // eat the :
263                return ParseFieldOrSpecWorker(_brackets, ref depth);
264            }
265
266            /// <summary>
267            /// Parses the field name and returns it.
268            /// </summary>
269            private string/*!*/ ParseFieldName(ref int depth) {
270                return ParseFieldOrSpecWorker(_fieldNameEnd, ref depth);
271            }
272
273            /// <summary>
274            /// Handles parsing the field name and the format spec and returns it.  At the parse
275            /// level these are basically the same - field names just have more terminating characters.
276            /// 
277            /// The most complex part of parsing them is they both allow nested braces and require
278            /// the braces are matched.  Strangely though the braces need to be matched across the
279            /// combined field and format spec - not within each format.
280            /// </summary>
281            private string/*!*/ ParseFieldOrSpecWorker(char[]/*!*/ ends, ref int depth) {
282                int end = _index - 1;
283                bool done = false;
284                do {
285                    end = _str.IndexOfAny(ends, end + 1);
286
287                    if (end == -1) {
288                        throw PythonOps.ValueError("unmatched '{{' in format");
289                    }
290
291                    switch (_str[end]) {
292                        case '{': depth++; break;
293                        case '}': depth--; break;
294                        default: done = true; break;
295                    }
296                } while (!done && depth != 0);
297
298                string res = _str.Substring(_index, end - _index);
299                _index = end;
300                return res;
301            }
302        }
303
304        #endregion
305
306        #region String Formatter
307
308        /// <summary>
309        /// Provides the built-in string formatter which is exposed to Python via the str.format API.
310        /// </summary>
311        private class Formatter {
312            private readonly PythonContext/*!*/ _context;
313            private readonly PythonTuple/*!*/ _args;
314            private readonly IDictionary<object, object>/*!*/ _kwArgs;
315            private readonly int _depth;
316            private int _autoNumberedIndex;
317
318            private Formatter(PythonContext/*!*/ context, PythonTuple/*!*/ args, IDictionary<object, object>/*!*/ kwArgs, int depth)
319                : this(context, args, kwArgs) {
320                _depth = depth;
321            }
322
323            private Formatter(PythonContext/*!*/ context, PythonTuple/*!*/ args, IDictionary<object, object>/*!*/ kwArgs) {
324                Assert.NotNull(context, args, kwArgs);
325
326                _context = context;
327                _args = args;
328                _kwArgs = kwArgs;
329            }
330
331            public static string/*!*/ FormatString(PythonContext/*!*/ context, string/*!*/ format, PythonTuple/*!*/ args, IDictionary<object, object>/*!*/ kwArgs) {
332                Assert.NotNull(context, args, kwArgs, format);
333
334                return new Formatter(context, args, kwArgs).ReplaceText(format);
335            }
336
337            public static string/*!*/ FormatString(PythonContext/*!*/ context, string/*!*/ format, PythonTuple/*!*/ args, IDictionary<object, object>/*!*/ kwArgs, int depth) {
338                Assert.NotNull(context, args, kwArgs, format);
339
340                if (depth == 2) {
341                    throw PythonOps.ValueError("Max string recursion exceeded");
342                }
343
344                return new Formatter(context, args, kwArgs, depth).ReplaceText(format);
345            }
346
347            private string ReplaceText(string format) {
348                StringBuilder builder = new StringBuilder();
349
350                foreach (PythonTuple pt in StringFormatParser.Parse(format)) {
351                    string text = (string)pt[0];
352                    string fieldName = (string)pt[1];
353                    string formatSpec = (string)pt[2];
354                    string conversionStr = (string)pt[3];
355                    char? conversion = conversionStr != null && conversionStr.Length > 0 ? conversionStr[0] : (char?)null;
356
357                    builder.Append(text);
358
359                    if (fieldName != null) {
360                        // get the argument value
361                        object argValue = GetArgumentValue(ParseFieldName(fieldName, true));
362
363                        // apply the conversion
364                        argValue = ApplyConversion(conversion, argValue);
365
366                        // handle computed format specifiers
367                        formatSpec = ReplaceComputedFormats(formatSpec);
368
369                        // append the string
370                        builder.Append(Builtin.format(_context.SharedContext, argValue, formatSpec));
371                    }
372                }
373
374                return builder.ToString();
375            }
376
377            /// <summary>
378            /// Inspects a format spec to see if it contains nested format specs which
379            /// we need to compute.  If so runs another string formatter on the format
380            /// spec to compute those values.
381            /// </summary>
382            private string/*!*/ ReplaceComputedFormats(string/*!*/ formatSpec) {
383                int computeStart = formatSpec.IndexOf('{');
384                if (computeStart != -1) {
385                    formatSpec = FormatString(
386                        _context,
387                        formatSpec,
388                        _args,
389                        _kwArgs,
390                        _depth + 1
391                    );
392                }
393                return formatSpec;
394            }
395
396            /// <summary>
397            /// Given the field name gets the object from our arguments running
398            /// any of the member/index accessors.
399            /// </summary>
400            private object GetArgumentValue(FieldName fieldName) {
401                return DoAccessors(fieldName, GetUnaccessedObject(fieldName));
402            }
403
404            /// <summary>
405            /// Applies the known built-in conversions to the object if a conversion is
406            /// specified.
407            /// </summary>
408            private object ApplyConversion(char? conversion, object argValue) {
409                switch (conversion) {
410                    case 'r':
411                        argValue = PythonOps.Repr(_context.SharedContext, argValue);
412                        break;
413                    case 's':
414                        argValue = PythonOps.ToString(_context.SharedContext, argValue);
415                        break;
416                    case null:
417                        // no conversion specified
418                        break;
419                    default:
420                        throw PythonOps.ValueError("Unknown conversion specifier {0}", conversion.Value);
421                }
422
423                return argValue;
424            }
425
426            /// <summary>
427            /// Gets the initial object represented by the field name - e.g. the 0 or
428            /// keyword name.
429            /// </summary>
430            private object GetUnaccessedObject(FieldName fieldName) {
431                int argIndex;
432                object argValue;
433
434                // get the object
435                if (fieldName.ArgumentName.Length == 0) {
436                    // auto-numbering of format specifiers
437                    if (_autoNumberedIndex == -1) {
438                        throw PythonOps.ValueError("cannot switch from manual field specification to automatic field numbering");
439                    }
440                    argValue = _args[_autoNumberedIndex++];
441                } else if (Int32.TryParse(fieldName.ArgumentName, out argIndex)) {
442                    if (_autoNumberedIndex > 0) {
443                        throw PythonOps.ValueError("cannot switch from automatic field numbering to manual field specification");
444                    }
445                    _autoNumberedIndex = -1;
446                    argValue = _args[argIndex];
447                } else {
448                    argValue = _kwArgs[fieldName.ArgumentName];
449                }
450
451                return argValue;
452            }
453
454            /// <summary>
455            /// Given the object value runs the accessors in the field name (if any) against the object.
456            /// </summary>
457            private object DoAccessors(FieldName fieldName, object argValue) {
458                foreach (FieldAccessor accessor in fieldName.Accessors) {
459                    // then do any accesses against the object
460                    int intVal;
461                    if (accessor.IsField) {
462                        argValue = PythonOps.GetBoundAttr(
463                            _context.SharedContext,
464                            argValue,
465                            accessor.AttributeName
466                        );
467                    } else if (Int32.TryParse(accessor.AttributeName, out intVal)) {
468                        argValue = PythonOps.GetIndex(
469                            _context.SharedContext,
470                            argValue,
471                            ScriptingRuntimeHelpers.Int32ToObject(intVal)
472                        );
473                    } else {
474                        argValue = PythonOps.GetIndex(
475                            _context.SharedContext,
476                            argValue,
477                            accessor.AttributeName
478                        );
479                    }
480                }
481
482                return argValue;
483            }
484
485        }
486
487        #endregion
488
489        #region Parser helper functions
490
491        /// <summary>
492        /// Parses the field name including attribute access or element indexing.
493        /// </summary>
494        private static FieldName ParseFieldName(string/*!*/ str, bool reportErrors) {
495            // (identifier | integer) ("." attribute_name | "[" element_index "]")*
496            int index = 0;
497            string arg = ParseIdentifier(str, false, ref index);
498
499            return new FieldName(arg, ParseFieldAccessors(str, index, reportErrors));
500        }
501
502        /// <summary>
503        /// Parses the field name including attribute access or element indexing.
504        /// </summary>
505        private static IEnumerable<FieldAccessor> ParseFieldAccessors(string/*!*/ str, int index, bool reportErrors) {
506            // (identifier | integer) ("." attribute_name | "[" element_index "]")*
507            while (index != str.Length && str[index] != '}') {
508                char accessType = str[index];
509
510                if (accessType == '.' || accessType == '[') {
511                    index++;
512                    bool isIndex = accessType == '[';
513                    string identifier = ParseIdentifier(str, isIndex, ref index);
514
515                    if (isIndex) {
516                        if (index == str.Length || str[index] != ']') {
517                            throw PythonOps.ValueError("Missing ']' in format string");
518                        }
519
520                        index++;
521                    }
522
523                    if (identifier.Length == 0) {
524                        throw PythonOps.ValueError("Empty attribute in format string");
525                    }
526
527                    yield return new FieldAccessor(identifier, !isIndex);
528                } else {
529                    if (reportErrors) {
530                        throw PythonOps.ValueError("Only '.' and '[' are valid in format field specifier, got {0}", accessType);
531                    } else {
532                        break;
533                    }
534                }
535            }
536        }
537
538        /// <summary>
539        /// Converts accessors from our internal structure into a PythonTuple matching how CPython
540        /// exposes these
541        /// </summary>
542        private static IEnumerable<PythonTuple> AccessorsToPython(IEnumerable<FieldAccessor> accessors) {
543            foreach (FieldAccessor accessor in accessors) {
544
545                int val;
546                object attrName = accessor.AttributeName;
547                if (Int32.TryParse(accessor.AttributeName, out val)) {
548                    attrName = ScriptingRuntimeHelpers.Int32ToObject(val);
549                }
550
551                yield return PythonTuple.MakeTuple(
552                    ScriptingRuntimeHelpers.BooleanToObject(accessor.IsField),
553                    attrName
554                );
555            }
556        }
557
558        /// <summary>
559        /// Parses an identifier and returns it
560        /// </summary>
561        private static string/*!*/ ParseIdentifier(string/*!*/ str, bool isIndex, ref int index) {
562            int start = index;
563            while (index < str.Length && str[index] != '.' && (isIndex || str[index] != '[') && (!isIndex || str[index] != ']')) {
564                index++;
565            }
566
567            return str.Substring(start, index - start);
568        }
569
570        /// <summary>
571        /// Encodes all the information about the field name.
572        /// </summary>
573        private struct FieldName {
574            public readonly string/*!*/ ArgumentName;
575            public readonly IEnumerable<FieldAccessor>/*!*/ Accessors;
576
577            public FieldName(string/*!*/ argumentName, IEnumerable<FieldAccessor>/*!*/ accessors) {
578
579                Assert.NotNull(argumentName, accessors);
580
581                ArgumentName = argumentName;
582                Accessors = accessors;
583            }
584        }
585
586        /// <summary>
587        /// Encodes a single field accessor (.b or [number] or [str])
588        /// </summary>
589        private struct FieldAccessor {
590            public readonly string AttributeName;
591            public readonly bool IsField;
592
593            public FieldAccessor(string attributeName, bool isField) {
594                AttributeName = attributeName;
595                IsField = isField;
596            }
597        }
598
599        #endregion
600    }
601}