PageRenderTime 44ms CodeModel.GetById 16ms app.highlight 22ms RepoModel.GetById 2ms app.codeStats 0ms

/Lib/string.py

http://unladen-swallow.googlecode.com/
Python | 642 lines | 634 code | 3 blank | 5 comment | 0 complexity | 32b5fc50031f44244e0f4b74a80c971b MD5 | raw file
  1"""A collection of string operations (most are no longer used).
  2
  3Warning: most of the code you see here isn't normally used nowadays.
  4Beginning with Python 1.6, many of these functions are implemented as
  5methods on the standard string object. They used to be implemented by
  6a built-in module called strop, but strop is now obsolete itself.
  7
  8Public module variables:
  9
 10whitespace -- a string containing all characters considered whitespace
 11lowercase -- a string containing all characters considered lowercase letters
 12uppercase -- a string containing all characters considered uppercase letters
 13letters -- a string containing all characters considered letters
 14digits -- a string containing all characters considered decimal digits
 15hexdigits -- a string containing all characters considered hexadecimal digits
 16octdigits -- a string containing all characters considered octal digits
 17punctuation -- a string containing all characters considered punctuation
 18printable -- a string containing all characters considered printable
 19
 20"""
 21
 22# Some strings for ctype-style character classification
 23whitespace = ' \t\n\r\v\f'
 24lowercase = 'abcdefghijklmnopqrstuvwxyz'
 25uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
 26letters = lowercase + uppercase
 27ascii_lowercase = lowercase
 28ascii_uppercase = uppercase
 29ascii_letters = ascii_lowercase + ascii_uppercase
 30digits = '0123456789'
 31hexdigits = digits + 'abcdef' + 'ABCDEF'
 32octdigits = '01234567'
 33punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
 34printable = digits + letters + punctuation + whitespace
 35
 36# Case conversion helpers
 37# Use str to convert Unicode literal in case of -U
 38l = map(chr, xrange(256))
 39_idmap = str('').join(l)
 40del l
 41
 42# Functions which aren't available as string methods.
 43
 44# Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
 45def capwords(s, sep=None):
 46    """capwords(s [,sep]) -> string
 47
 48    Split the argument into words using split, capitalize each
 49    word using capitalize, and join the capitalized words using
 50    join.  If the optional second argument sep is absent or None,
 51    runs of whitespace characters are replaced by a single space
 52    and leading and trailing whitespace are removed, otherwise
 53    sep is used to split and join the words.
 54
 55    """
 56    return (sep or ' ').join(x.capitalize() for x in s.split(sep))
 57
 58
 59# Construct a translation string
 60_idmapL = None
 61def maketrans(fromstr, tostr):
 62    """maketrans(frm, to) -> string
 63
 64    Return a translation table (a string of 256 bytes long)
 65    suitable for use in string.translate.  The strings frm and to
 66    must be of the same length.
 67
 68    """
 69    if len(fromstr) != len(tostr):
 70        raise ValueError, "maketrans arguments must have same length"
 71    global _idmapL
 72    if not _idmapL:
 73        _idmapL = list(_idmap)
 74    L = _idmapL[:]
 75    fromstr = map(ord, fromstr)
 76    for i in range(len(fromstr)):
 77        L[fromstr[i]] = tostr[i]
 78    return ''.join(L)
 79
 80
 81
 82####################################################################
 83import re as _re
 84
 85class _multimap:
 86    """Helper class for combining multiple mappings.
 87
 88    Used by .{safe_,}substitute() to combine the mapping and keyword
 89    arguments.
 90    """
 91    def __init__(self, primary, secondary):
 92        self._primary = primary
 93        self._secondary = secondary
 94
 95    def __getitem__(self, key):
 96        try:
 97            return self._primary[key]
 98        except KeyError:
 99            return self._secondary[key]
100
101
102class _TemplateMetaclass(type):
103    pattern = r"""
104    %(delim)s(?:
105      (?P<escaped>%(delim)s) |   # Escape sequence of two delimiters
106      (?P<named>%(id)s)      |   # delimiter and a Python identifier
107      {(?P<braced>%(id)s)}   |   # delimiter and a braced identifier
108      (?P<invalid>)              # Other ill-formed delimiter exprs
109    )
110    """
111
112    def __init__(cls, name, bases, dct):
113        super(_TemplateMetaclass, cls).__init__(name, bases, dct)
114        if 'pattern' in dct:
115            pattern = cls.pattern
116        else:
117            pattern = _TemplateMetaclass.pattern % {
118                'delim' : _re.escape(cls.delimiter),
119                'id'    : cls.idpattern,
120                }
121        cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
122
123
124class Template:
125    """A string class for supporting $-substitutions."""
126    __metaclass__ = _TemplateMetaclass
127
128    delimiter = '$'
129    idpattern = r'[_a-z][_a-z0-9]*'
130
131    def __init__(self, template):
132        self.template = template
133
134    # Search for $$, $identifier, ${identifier}, and any bare $'s
135
136    def _invalid(self, mo):
137        i = mo.start('invalid')
138        lines = self.template[:i].splitlines(True)
139        if not lines:
140            colno = 1
141            lineno = 1
142        else:
143            colno = i - len(''.join(lines[:-1]))
144            lineno = len(lines)
145        raise ValueError('Invalid placeholder in string: line %d, col %d' %
146                         (lineno, colno))
147
148    def substitute(self, *args, **kws):
149        if len(args) > 1:
150            raise TypeError('Too many positional arguments')
151        if not args:
152            mapping = kws
153        elif kws:
154            mapping = _multimap(kws, args[0])
155        else:
156            mapping = args[0]
157        # Helper function for .sub()
158        def convert(mo):
159            # Check the most common path first.
160            named = mo.group('named') or mo.group('braced')
161            if named is not None:
162                val = mapping[named]
163                # We use this idiom instead of str() because the latter will
164                # fail if val is a Unicode containing non-ASCII characters.
165                return '%s' % (val,)
166            if mo.group('escaped') is not None:
167                return self.delimiter
168            if mo.group('invalid') is not None:
169                self._invalid(mo)
170            raise ValueError('Unrecognized named group in pattern',
171                             self.pattern)
172        return self.pattern.sub(convert, self.template)
173
174    def safe_substitute(self, *args, **kws):
175        if len(args) > 1:
176            raise TypeError('Too many positional arguments')
177        if not args:
178            mapping = kws
179        elif kws:
180            mapping = _multimap(kws, args[0])
181        else:
182            mapping = args[0]
183        # Helper function for .sub()
184        def convert(mo):
185            named = mo.group('named')
186            if named is not None:
187                try:
188                    # We use this idiom instead of str() because the latter
189                    # will fail if val is a Unicode containing non-ASCII
190                    return '%s' % (mapping[named],)
191                except KeyError:
192                    return self.delimiter + named
193            braced = mo.group('braced')
194            if braced is not None:
195                try:
196                    return '%s' % (mapping[braced],)
197                except KeyError:
198                    return self.delimiter + '{' + braced + '}'
199            if mo.group('escaped') is not None:
200                return self.delimiter
201            if mo.group('invalid') is not None:
202                return self.delimiter
203            raise ValueError('Unrecognized named group in pattern',
204                             self.pattern)
205        return self.pattern.sub(convert, self.template)
206
207
208
209####################################################################
210# NOTE: Everything below here is deprecated.  Use string methods instead.
211# This stuff will go away in Python 3.0.
212
213# Backward compatible names for exceptions
214index_error = ValueError
215atoi_error = ValueError
216atof_error = ValueError
217atol_error = ValueError
218
219# convert UPPER CASE letters to lower case
220def lower(s):
221    """lower(s) -> string
222
223    Return a copy of the string s converted to lowercase.
224
225    """
226    return s.lower()
227
228# Convert lower case letters to UPPER CASE
229def upper(s):
230    """upper(s) -> string
231
232    Return a copy of the string s converted to uppercase.
233
234    """
235    return s.upper()
236
237# Swap lower case letters and UPPER CASE
238def swapcase(s):
239    """swapcase(s) -> string
240
241    Return a copy of the string s with upper case characters
242    converted to lowercase and vice versa.
243
244    """
245    return s.swapcase()
246
247# Strip leading and trailing tabs and spaces
248def strip(s, chars=None):
249    """strip(s [,chars]) -> string
250
251    Return a copy of the string s with leading and trailing
252    whitespace removed.
253    If chars is given and not None, remove characters in chars instead.
254    If chars is unicode, S will be converted to unicode before stripping.
255
256    """
257    return s.strip(chars)
258
259# Strip leading tabs and spaces
260def lstrip(s, chars=None):
261    """lstrip(s [,chars]) -> string
262
263    Return a copy of the string s with leading whitespace removed.
264    If chars is given and not None, remove characters in chars instead.
265
266    """
267    return s.lstrip(chars)
268
269# Strip trailing tabs and spaces
270def rstrip(s, chars=None):
271    """rstrip(s [,chars]) -> string
272
273    Return a copy of the string s with trailing whitespace removed.
274    If chars is given and not None, remove characters in chars instead.
275
276    """
277    return s.rstrip(chars)
278
279
280# Split a string into a list of space/tab-separated words
281def split(s, sep=None, maxsplit=-1):
282    """split(s [,sep [,maxsplit]]) -> list of strings
283
284    Return a list of the words in the string s, using sep as the
285    delimiter string.  If maxsplit is given, splits at no more than
286    maxsplit places (resulting in at most maxsplit+1 words).  If sep
287    is not specified or is None, any whitespace string is a separator.
288
289    (split and splitfields are synonymous)
290
291    """
292    return s.split(sep, maxsplit)
293splitfields = split
294
295# Split a string into a list of space/tab-separated words
296def rsplit(s, sep=None, maxsplit=-1):
297    """rsplit(s [,sep [,maxsplit]]) -> list of strings
298
299    Return a list of the words in the string s, using sep as the
300    delimiter string, starting at the end of the string and working
301    to the front.  If maxsplit is given, at most maxsplit splits are
302    done. If sep is not specified or is None, any whitespace string
303    is a separator.
304    """
305    return s.rsplit(sep, maxsplit)
306
307# Join fields with optional separator
308def join(words, sep = ' '):
309    """join(list [,sep]) -> string
310
311    Return a string composed of the words in list, with
312    intervening occurrences of sep.  The default separator is a
313    single space.
314
315    (joinfields and join are synonymous)
316
317    """
318    return sep.join(words)
319joinfields = join
320
321# Find substring, raise exception if not found
322def index(s, *args):
323    """index(s, sub [,start [,end]]) -> int
324
325    Like find but raises ValueError when the substring is not found.
326
327    """
328    return s.index(*args)
329
330# Find last substring, raise exception if not found
331def rindex(s, *args):
332    """rindex(s, sub [,start [,end]]) -> int
333
334    Like rfind but raises ValueError when the substring is not found.
335
336    """
337    return s.rindex(*args)
338
339# Count non-overlapping occurrences of substring
340def count(s, *args):
341    """count(s, sub[, start[,end]]) -> int
342
343    Return the number of occurrences of substring sub in string
344    s[start:end].  Optional arguments start and end are
345    interpreted as in slice notation.
346
347    """
348    return s.count(*args)
349
350# Find substring, return -1 if not found
351def find(s, *args):
352    """find(s, sub [,start [,end]]) -> in
353
354    Return the lowest index in s where substring sub is found,
355    such that sub is contained within s[start,end].  Optional
356    arguments start and end are interpreted as in slice notation.
357
358    Return -1 on failure.
359
360    """
361    return s.find(*args)
362
363# Find last substring, return -1 if not found
364def rfind(s, *args):
365    """rfind(s, sub [,start [,end]]) -> int
366
367    Return the highest index in s where substring sub is found,
368    such that sub is contained within s[start,end].  Optional
369    arguments start and end are interpreted as in slice notation.
370
371    Return -1 on failure.
372
373    """
374    return s.rfind(*args)
375
376# for a bit of speed
377_float = float
378_int = int
379_long = long
380
381# Convert string to float
382def atof(s):
383    """atof(s) -> float
384
385    Return the floating point number represented by the string s.
386
387    """
388    return _float(s)
389
390
391# Convert string to integer
392def atoi(s , base=10):
393    """atoi(s [,base]) -> int
394
395    Return the integer represented by the string s in the given
396    base, which defaults to 10.  The string s must consist of one
397    or more digits, possibly preceded by a sign.  If base is 0, it
398    is chosen from the leading characters of s, 0 for octal, 0x or
399    0X for hexadecimal.  If base is 16, a preceding 0x or 0X is
400    accepted.
401
402    """
403    return _int(s, base)
404
405
406# Convert string to long integer
407def atol(s, base=10):
408    """atol(s [,base]) -> long
409
410    Return the long integer represented by the string s in the
411    given base, which defaults to 10.  The string s must consist
412    of one or more digits, possibly preceded by a sign.  If base
413    is 0, it is chosen from the leading characters of s, 0 for
414    octal, 0x or 0X for hexadecimal.  If base is 16, a preceding
415    0x or 0X is accepted.  A trailing L or l is not accepted,
416    unless base is 0.
417
418    """
419    return _long(s, base)
420
421
422# Left-justify a string
423def ljust(s, width, *args):
424    """ljust(s, width[, fillchar]) -> string
425
426    Return a left-justified version of s, in a field of the
427    specified width, padded with spaces as needed.  The string is
428    never truncated.  If specified the fillchar is used instead of spaces.
429
430    """
431    return s.ljust(width, *args)
432
433# Right-justify a string
434def rjust(s, width, *args):
435    """rjust(s, width[, fillchar]) -> string
436
437    Return a right-justified version of s, in a field of the
438    specified width, padded with spaces as needed.  The string is
439    never truncated.  If specified the fillchar is used instead of spaces.
440
441    """
442    return s.rjust(width, *args)
443
444# Center a string
445def center(s, width, *args):
446    """center(s, width[, fillchar]) -> string
447
448    Return a center version of s, in a field of the specified
449    width. padded with spaces as needed.  The string is never
450    truncated.  If specified the fillchar is used instead of spaces.
451
452    """
453    return s.center(width, *args)
454
455# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
456# Decadent feature: the argument may be a string or a number
457# (Use of this is deprecated; it should be a string as with ljust c.s.)
458def zfill(x, width):
459    """zfill(x, width) -> string
460
461    Pad a numeric string x with zeros on the left, to fill a field
462    of the specified width.  The string x is never truncated.
463
464    """
465    if not isinstance(x, basestring):
466        x = repr(x)
467    return x.zfill(width)
468
469# Expand tabs in a string.
470# Doesn't take non-printing chars into account, but does understand \n.
471def expandtabs(s, tabsize=8):
472    """expandtabs(s [,tabsize]) -> string
473
474    Return a copy of the string s with all tab characters replaced
475    by the appropriate number of spaces, depending on the current
476    column, and the tabsize (default 8).
477
478    """
479    return s.expandtabs(tabsize)
480
481# Character translation through look-up table.
482def translate(s, table, deletions=""):
483    """translate(s,table [,deletions]) -> string
484
485    Return a copy of the string s, where all characters occurring
486    in the optional argument deletions are removed, and the
487    remaining characters have been mapped through the given
488    translation table, which must be a string of length 256.  The
489    deletions argument is not allowed for Unicode strings.
490
491    """
492    if deletions or table is None:
493        return s.translate(table, deletions)
494    else:
495        # Add s[:0] so that if s is Unicode and table is an 8-bit string,
496        # table is converted to Unicode.  This means that table *cannot*
497        # be a dictionary -- for that feature, use u.translate() directly.
498        return s.translate(table + s[:0])
499
500# Capitalize a string, e.g. "aBc  dEf" -> "Abc  def".
501def capitalize(s):
502    """capitalize(s) -> string
503
504    Return a copy of the string s with only its first character
505    capitalized.
506
507    """
508    return s.capitalize()
509
510# Substring replacement (global)
511def replace(s, old, new, maxsplit=-1):
512    """replace (str, old, new[, maxsplit]) -> string
513
514    Return a copy of string str with all occurrences of substring
515    old replaced by new. If the optional argument maxsplit is
516    given, only the first maxsplit occurrences are replaced.
517
518    """
519    return s.replace(old, new, maxsplit)
520
521
522# Try importing optional built-in module "strop" -- if it exists,
523# it redefines some string operations that are 100-1000 times faster.
524# It also defines values for whitespace, lowercase and uppercase
525# that match <ctype.h>'s definitions.
526
527try:
528    from strop import maketrans, lowercase, uppercase, whitespace
529    letters = lowercase + uppercase
530except ImportError:
531    pass                                          # Use the original versions
532
533########################################################################
534# the Formatter class
535# see PEP 3101 for details and purpose of this class
536
537# The hard parts are reused from the C implementation.  They're exposed as "_"
538# prefixed methods of str and unicode.
539
540# The overall parser is implemented in str._formatter_parser.
541# The field name parser is implemented in str._formatter_field_name_split
542
543class Formatter(object):
544    def format(self, format_string, *args, **kwargs):
545        return self.vformat(format_string, args, kwargs)
546
547    def vformat(self, format_string, args, kwargs):
548        used_args = set()
549        result = self._vformat(format_string, args, kwargs, used_args, 2)
550        self.check_unused_args(used_args, args, kwargs)
551        return result
552
553    def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
554        if recursion_depth < 0:
555            raise ValueError('Max string recursion exceeded')
556        result = []
557        for literal_text, field_name, format_spec, conversion in \
558                self.parse(format_string):
559
560            # output the literal text
561            if literal_text:
562                result.append(literal_text)
563
564            # if there's a field, output it
565            if field_name is not None:
566                # this is some markup, find the object and do
567                #  the formatting
568
569                # given the field_name, find the object it references
570                #  and the argument it came from
571                obj, arg_used = self.get_field(field_name, args, kwargs)
572                used_args.add(arg_used)
573
574                # do any conversion on the resulting object
575                obj = self.convert_field(obj, conversion)
576
577                # expand the format spec, if needed
578                format_spec = self._vformat(format_spec, args, kwargs,
579                                            used_args, recursion_depth-1)
580
581                # format the object and append to the result
582                result.append(self.format_field(obj, format_spec))
583
584        return ''.join(result)
585
586
587    def get_value(self, key, args, kwargs):
588        if isinstance(key, (int, long)):
589            return args[key]
590        else:
591            return kwargs[key]
592
593
594    def check_unused_args(self, used_args, args, kwargs):
595        pass
596
597
598    def format_field(self, value, format_spec):
599        return format(value, format_spec)
600
601
602    def convert_field(self, value, conversion):
603        # do any conversion on the resulting object
604        if conversion == 'r':
605            return repr(value)
606        elif conversion == 's':
607            return str(value)
608        elif conversion is None:
609            return value
610        raise ValueError("Unknown converion specifier {0!s}".format(conversion))
611
612
613    # returns an iterable that contains tuples of the form:
614    # (literal_text, field_name, format_spec, conversion)
615    # literal_text can be zero length
616    # field_name can be None, in which case there's no
617    #  object to format and output
618    # if field_name is not None, it is looked up, formatted
619    #  with format_spec and conversion and then used
620    def parse(self, format_string):
621        return format_string._formatter_parser()
622
623
624    # given a field_name, find the object it references.
625    #  field_name:   the field being looked up, e.g. "0.name"
626    #                 or "lookup[3]"
627    #  used_args:    a set of which args have been used
628    #  args, kwargs: as passed in to vformat
629    def get_field(self, field_name, args, kwargs):
630        first, rest = field_name._formatter_field_name_split()
631
632        obj = self.get_value(first, args, kwargs)
633
634        # loop through the rest of the field_name, doing
635        #  getattr or getitem as needed
636        for is_attr, i in rest:
637            if is_attr:
638                obj = getattr(obj, i)
639            else:
640                obj = obj[i]
641
642        return obj, first