PageRenderTime 406ms CodeModel.GetById 81ms app.highlight 110ms RepoModel.GetById 203ms app.codeStats 0ms

/mysql_watcher/indra/base/llsd.py

https://bitbucket.org/lindenlab/apiary/
Python | 1052 lines | 993 code | 10 blank | 49 comment | 5 complexity | e2733d1d09a28fdd1f5da1363e247fa9 MD5 | raw file
   1"""\
   2@file llsd.py
   3@brief Types as well as parsing and formatting functions for handling LLSD.
   4
   5$LicenseInfo:firstyear=2006&license=mit$
   6
   7Copyright (c) 2006-2009, Linden Research, Inc.
   8
   9Permission is hereby granted, free of charge, to any person obtaining a copy
  10of this software and associated documentation files (the "Software"), to deal
  11in the Software without restriction, including without limitation the rights
  12to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  13copies of the Software, and to permit persons to whom the Software is
  14furnished to do so, subject to the following conditions:
  15
  16The above copyright notice and this permission notice shall be included in
  17all copies or substantial portions of the Software.
  18
  19THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  20IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  22AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  23LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  24OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  25THE SOFTWARE.
  26$/LicenseInfo$
  27"""
  28
  29import datetime
  30import base64
  31import string
  32import struct
  33import time
  34import types
  35import re
  36
  37from indra.util.fastest_elementtree import ElementTreeError, fromstring
  38from indra.base import lluuid
  39
  40# cllsd.c in server/server-1.25 has memory leaks,
  41#   so disabling cllsd for now
  42#try:
  43#    import cllsd
  44#except ImportError:
  45#    cllsd = None
  46cllsd = None
  47
  48int_regex = re.compile(r"[-+]?\d+")
  49real_regex = re.compile(r"[-+]?(\d+(\.\d*)?|\d*\.\d+)([eE][-+]?\d+)?")
  50alpha_regex = re.compile(r"[a-zA-Z]+")
  51date_regex = re.compile(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})T"
  52                        r"(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})"
  53                        r"(?P<second_float>(\.\d+)?)Z")
  54#date: d"YYYY-MM-DDTHH:MM:SS.FFFFFFZ"
  55
  56class LLSDParseError(Exception):
  57    pass
  58
  59class LLSDSerializationError(TypeError):
  60    pass
  61
  62
  63class binary(str):
  64    pass
  65
  66class uri(str):
  67    pass
  68
  69
  70BOOL_TRUE = ('1', '1.0', 'true')
  71BOOL_FALSE = ('0', '0.0', 'false', '')
  72
  73
  74def format_datestr(v):
  75    """ Formats a datetime or date object into the string format shared by xml and notation serializations."""
  76    if hasattr(v, 'microsecond'):
  77        return v.isoformat() + 'Z'
  78    else:
  79        return v.strftime('%Y-%m-%dT%H:%M:%SZ')
  80
  81def parse_datestr(datestr):
  82    """Parses a datetime object from the string format shared by xml and notation serializations."""
  83    if datestr == "":
  84        return datetime.datetime(1970, 1, 1)
  85    
  86    match = re.match(date_regex, datestr)
  87    if not match:
  88        raise LLSDParseError("invalid date string '%s'." % datestr)
  89    
  90    year = int(match.group('year'))
  91    month = int(match.group('month'))
  92    day = int(match.group('day'))
  93    hour = int(match.group('hour'))
  94    minute = int(match.group('minute'))
  95    second = int(match.group('second'))
  96    seconds_float = match.group('second_float')
  97    microsecond = 0
  98    if seconds_float:
  99        microsecond = int(float('0' + seconds_float) * 1e6)
 100    return datetime.datetime(year, month, day, hour, minute, second, microsecond)
 101
 102
 103def bool_to_python(node):
 104    val = node.text or ''
 105    if val in BOOL_TRUE:
 106        return True
 107    else:
 108        return False
 109
 110def int_to_python(node):
 111    val = node.text or ''
 112    if not val.strip():
 113        return 0
 114    return int(val)
 115
 116def real_to_python(node):
 117    val = node.text or ''
 118    if not val.strip():
 119        return 0.0
 120    return float(val)
 121
 122def uuid_to_python(node):
 123    return lluuid.UUID(node.text)
 124
 125def str_to_python(node):
 126    return node.text or ''
 127
 128def bin_to_python(node):
 129    return binary(base64.decodestring(node.text or ''))
 130
 131def date_to_python(node):
 132    val = node.text or ''
 133    if not val:
 134        val = "1970-01-01T00:00:00Z"
 135    return parse_datestr(val)
 136    
 137
 138def uri_to_python(node):
 139    val = node.text or ''
 140    if not val:
 141        return None
 142    return uri(val)
 143
 144def map_to_python(node):
 145    result = {}
 146    for index in range(len(node))[::2]:
 147        result[node[index].text] = to_python(node[index+1])
 148    return result
 149
 150def array_to_python(node):
 151    return [to_python(child) for child in node]
 152
 153
 154NODE_HANDLERS = dict(
 155    undef=lambda x: None,
 156    boolean=bool_to_python,
 157    integer=int_to_python,
 158    real=real_to_python,
 159    uuid=uuid_to_python,
 160    string=str_to_python,
 161    binary=bin_to_python,
 162    date=date_to_python,
 163    uri=uri_to_python,
 164    map=map_to_python,
 165    array=array_to_python,
 166    )
 167
 168def to_python(node):
 169    return NODE_HANDLERS[node.tag](node)
 170
 171class Nothing(object):
 172    pass
 173
 174
 175class LLSDXMLFormatter(object):
 176    def __init__(self):
 177        self.type_map = {
 178            type(None) : self.UNDEF,
 179            bool : self.BOOLEAN,
 180            int : self.INTEGER,
 181            long : self.INTEGER,
 182            float : self.REAL,
 183            lluuid.UUID : self.UUID,
 184            binary : self.BINARY,
 185            str : self.STRING,
 186            unicode : self.STRING,
 187            uri : self.URI,
 188            datetime.datetime : self.DATE,
 189            datetime.date : self.DATE,
 190            list : self.ARRAY,
 191            tuple : self.ARRAY,
 192            types.GeneratorType : self.ARRAY,
 193            dict : self.MAP,
 194            LLSD : self.LLSD
 195        }
 196
 197    def elt(self, name, contents=None):
 198        if(contents is None or contents is ''):
 199            return "<%s />" % (name,)
 200        else:
 201            if type(contents) is unicode:
 202                contents = contents.encode('utf-8')
 203            return "<%s>%s</%s>" % (name, contents, name)
 204
 205    def xml_esc(self, v):
 206        if type(v) is unicode:
 207            v = v.encode('utf-8')
 208        return v.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
 209
 210    def LLSD(self, v):
 211        return self.generate(v.thing)
 212    def UNDEF(self, v):
 213        return self.elt('undef')
 214    def BOOLEAN(self, v):
 215        if v:
 216            return self.elt('boolean', 'true')
 217        else:
 218            return self.elt('boolean', 'false')
 219    def INTEGER(self, v):
 220        return self.elt('integer', v)
 221    def REAL(self, v):
 222        return self.elt('real', v)
 223    def UUID(self, v):
 224        if(v.isNull()):
 225            return self.elt('uuid')
 226        else:
 227            return self.elt('uuid', v)
 228    def BINARY(self, v):
 229        return self.elt('binary', base64.encodestring(v))
 230    def STRING(self, v):
 231        return self.elt('string', self.xml_esc(v))
 232    def URI(self, v):
 233        return self.elt('uri', self.xml_esc(str(v)))
 234    def DATE(self, v):
 235        return self.elt('date', format_datestr(v))
 236    def ARRAY(self, v):
 237        return self.elt('array', ''.join([self.generate(item) for item in v]))
 238    def MAP(self, v):
 239        return self.elt(
 240            'map',
 241            ''.join(["%s%s" % (self.elt('key', self.xml_esc(str(key))), self.generate(value))
 242             for key, value in v.items()]))
 243
 244    typeof = type
 245    def generate(self, something):
 246        t = self.typeof(something)
 247        if self.type_map.has_key(t):
 248            return self.type_map[t](something)
 249        else:
 250            raise LLSDSerializationError("Cannot serialize unknown type: %s (%s)" % (
 251                t, something))
 252
 253    def _format(self, something):
 254        return '<?xml version="1.0" ?>' + self.elt("llsd", self.generate(something))
 255
 256    def format(self, something):
 257        if cllsd:
 258            return cllsd.llsd_to_xml(something)
 259        return self._format(something)
 260
 261_g_xml_formatter = None
 262def format_xml(something):
 263    global _g_xml_formatter
 264    if _g_xml_formatter is None:
 265        _g_xml_formatter = LLSDXMLFormatter()
 266    return _g_xml_formatter.format(something)
 267
 268class LLSDXMLPrettyFormatter(LLSDXMLFormatter):
 269    def __init__(self, indent_atom = None):
 270        # Call the super class constructor so that we have the type map
 271        super(LLSDXMLPrettyFormatter, self).__init__()
 272
 273        # Override the type map to use our specialized formatters to
 274        # emit the pretty output.
 275        self.type_map[list] = self.PRETTY_ARRAY
 276        self.type_map[tuple] = self.PRETTY_ARRAY
 277        self.type_map[types.GeneratorType] = self.PRETTY_ARRAY,
 278        self.type_map[dict] = self.PRETTY_MAP
 279
 280        # Private data used for indentation.
 281        self._indent_level = 1
 282        if indent_atom is None:
 283            self._indent_atom = '  '
 284        else:
 285            self._indent_atom = indent_atom
 286
 287    def _indent(self):
 288        "Return an indentation based on the atom and indentation level."
 289        return self._indent_atom * self._indent_level
 290
 291    def PRETTY_ARRAY(self, v):
 292        rv = []
 293        rv.append('<array>\n')
 294        self._indent_level = self._indent_level + 1
 295        rv.extend(["%s%s\n" %
 296                   (self._indent(),
 297                    self.generate(item))
 298                   for item in v])
 299        self._indent_level = self._indent_level - 1
 300        rv.append(self._indent())
 301        rv.append('</array>')
 302        return ''.join(rv)
 303
 304    def PRETTY_MAP(self, v):
 305        rv = []
 306        rv.append('<map>\n')
 307        self._indent_level = self._indent_level + 1
 308        keys = v.keys()
 309        keys.sort()
 310        rv.extend(["%s%s\n%s%s\n" %
 311                   (self._indent(),
 312                    self.elt('key', key),
 313                    self._indent(),
 314                    self.generate(v[key]))
 315                   for key in keys])
 316        self._indent_level = self._indent_level - 1
 317        rv.append(self._indent())
 318        rv.append('</map>')
 319        return ''.join(rv)
 320
 321    def format(self, something):
 322        data = []
 323        data.append('<?xml version="1.0" ?>\n<llsd>')
 324        data.append(self.generate(something))
 325        data.append('</llsd>\n')
 326        return '\n'.join(data)
 327
 328def format_pretty_xml(something):
 329    """@brief Serialize a python object as 'pretty' llsd xml.
 330
 331    The output conforms to the LLSD DTD, unlike the output from the
 332    standard python xml.dom DOM::toprettyxml() method which does not
 333    preserve significant whitespace. 
 334    This function is not necessarily suited for serializing very large
 335    objects. It is not optimized by the cllsd module, and sorts on
 336    dict (llsd map) keys alphabetically to ease human reading.
 337    """
 338    return LLSDXMLPrettyFormatter().format(something)
 339
 340class LLSDNotationFormatter(object):
 341    def __init__(self):
 342        self.type_map = {
 343            type(None) : self.UNDEF,
 344            bool : self.BOOLEAN,
 345            int : self.INTEGER,
 346            long : self.INTEGER,
 347            float : self.REAL,
 348            lluuid.UUID : self.UUID,
 349            binary : self.BINARY,
 350            str : self.STRING,
 351            unicode : self.STRING,
 352            uri : self.URI,
 353            datetime.datetime : self.DATE,
 354            datetime.date : self.DATE,
 355            list : self.ARRAY,
 356            tuple : self.ARRAY,
 357            types.GeneratorType : self.ARRAY,
 358            dict : self.MAP,
 359            LLSD : self.LLSD
 360        }
 361
 362    def LLSD(self, v):
 363        return self.generate(v.thing)
 364    def UNDEF(self, v):
 365        return '!'
 366    def BOOLEAN(self, v):
 367        if v:
 368            return 'true'
 369        else:
 370            return 'false'
 371    def INTEGER(self, v):
 372        return "i%s" % v
 373    def REAL(self, v):
 374        return "r%s" % v
 375    def UUID(self, v):
 376        return "u%s" % v
 377    def BINARY(self, v):
 378        return 'b64"' + base64.encodestring(v) + '"'
 379    def STRING(self, v):
 380        if isinstance(v, unicode):
 381            v = v.encode('utf-8')
 382        return "'%s'" % v.replace("\\", "\\\\").replace("'", "\\'")
 383    def URI(self, v):
 384        return 'l"%s"' % str(v).replace("\\", "\\\\").replace('"', '\\"')
 385    def DATE(self, v):
 386        return 'd"%s"' % format_datestr(v)
 387    def ARRAY(self, v):
 388        return "[%s]" % ','.join([self.generate(item) for item in v])
 389    def MAP(self, v):
 390        def fix(key):
 391            if isinstance(key, unicode):
 392                return key.encode('utf-8')
 393            return key
 394        return "{%s}" % ','.join(["'%s':%s" % (fix(key).replace("\\", "\\\\").replace("'", "\\'"), self.generate(value))
 395             for key, value in v.items()])
 396
 397    def generate(self, something):
 398        t = type(something)
 399        handler = self.type_map.get(t)
 400        if handler:
 401            return handler(something)
 402        else:
 403            try:
 404                return self.ARRAY(iter(something))
 405            except TypeError:
 406                raise LLSDSerializationError(
 407                    "Cannot serialize unknown type: %s (%s)" % (t, something))
 408
 409    def format(self, something):
 410        return self.generate(something)
 411
 412def format_notation(something):
 413    return LLSDNotationFormatter().format(something)
 414
 415def _hex_as_nybble(hex):
 416    if (hex >= '0') and (hex <= '9'):
 417        return ord(hex) - ord('0')
 418    elif (hex >= 'a') and (hex <='f'):
 419        return 10 + ord(hex) - ord('a')
 420    elif (hex >= 'A') and (hex <='F'):
 421        return 10 + ord(hex) - ord('A');
 422
 423class LLSDBinaryParser(object):
 424    def __init__(self):
 425        pass
 426
 427    def parse(self, buffer, ignore_binary = False):
 428        """
 429        This is the basic public interface for parsing.
 430
 431        @param buffer the binary data to parse in an indexable sequence.
 432        @param ignore_binary parser throws away data in llsd binary nodes.
 433        @return returns a python object.
 434        """
 435        self._buffer = buffer
 436        self._index = 0
 437        self._keep_binary = not ignore_binary
 438        return self._parse()
 439
 440    def _parse(self):
 441        cc = self._buffer[self._index]
 442        self._index += 1
 443        if cc == '{':
 444            return self._parse_map()
 445        elif cc == '[':
 446            return self._parse_array()
 447        elif cc == '!':
 448            return None
 449        elif cc == '0':
 450            return False
 451        elif cc == '1':
 452            return True
 453        elif cc == 'i':
 454            # 'i' = integer
 455            idx = self._index
 456            self._index += 4
 457            return struct.unpack("!i", self._buffer[idx:idx+4])[0]
 458        elif cc == ('r'):
 459            # 'r' = real number
 460            idx = self._index
 461            self._index += 8
 462            return struct.unpack("!d", self._buffer[idx:idx+8])[0]
 463        elif cc == 'u':
 464            # 'u' = uuid
 465            idx = self._index
 466            self._index += 16
 467            return lluuid.uuid_bits_to_uuid(self._buffer[idx:idx+16])
 468        elif cc == 's':
 469            # 's' = string
 470            return self._parse_string()
 471        elif cc in ("'", '"'):
 472            # delimited/escaped string
 473            return self._parse_string_delim(cc)
 474        elif cc == 'l':
 475            # 'l' = uri
 476            return uri(self._parse_string())
 477        elif cc == ('d'):
 478            # 'd' = date in seconds since epoch
 479            idx = self._index
 480            self._index += 8
 481            seconds = struct.unpack("!d", self._buffer[idx:idx+8])[0]
 482            return datetime.datetime.fromtimestamp(seconds)
 483        elif cc == 'b':
 484            binary = self._parse_string()
 485            if self._keep_binary:
 486                return binary
 487            # *NOTE: maybe have a binary placeholder which has the
 488            # length.
 489            return None
 490        else:
 491            raise LLSDParseError("invalid binary token at byte %d: %d" % (
 492                self._index - 1, ord(cc)))
 493
 494    def _parse_map(self):
 495        rv = {}
 496        size = struct.unpack("!i", self._buffer[self._index:self._index+4])[0]
 497        self._index += 4
 498        count = 0
 499        cc = self._buffer[self._index]
 500        self._index += 1
 501        key = ''
 502        while (cc != '}') and (count < size):
 503            if cc == 'k':
 504                key = self._parse_string()
 505            elif cc in ("'", '"'):
 506                key = self._parse_string_delim(cc)
 507            else:
 508                raise LLSDParseError("invalid map key at byte %d." % (
 509                    self._index - 1,))
 510            value = self._parse()
 511            rv[key] = value
 512            count += 1
 513            cc = self._buffer[self._index]
 514            self._index += 1
 515        if cc != '}':
 516            raise LLSDParseError("invalid map close token at byte %d." % (
 517                self._index,))
 518        return rv
 519
 520    def _parse_array(self):
 521        rv = []
 522        size = struct.unpack("!i", self._buffer[self._index:self._index+4])[0]
 523        self._index += 4
 524        count = 0
 525        cc = self._buffer[self._index]
 526        while (cc != ']') and (count < size):
 527            rv.append(self._parse())
 528            count += 1
 529            cc = self._buffer[self._index]
 530        if cc != ']':
 531            raise LLSDParseError("invalid array close token at byte %d." % (
 532                self._index,))
 533        self._index += 1
 534        return rv
 535
 536    def _parse_string(self):
 537        size = struct.unpack("!i", self._buffer[self._index:self._index+4])[0]
 538        self._index += 4
 539        rv = self._buffer[self._index:self._index+size]
 540        self._index += size
 541        return rv
 542
 543    def _parse_string_delim(self, delim):
 544        list = []
 545        found_escape = False
 546        found_hex = False
 547        found_digit = False
 548        byte = 0
 549        while True:
 550            cc = self._buffer[self._index]
 551            self._index += 1
 552            if found_escape:
 553                if found_hex:
 554                    if found_digit:
 555                        found_escape = False
 556                        found_hex = False
 557                        found_digit = False
 558                        byte <<= 4
 559                        byte |= _hex_as_nybble(cc)
 560                        list.append(chr(byte))
 561                        byte = 0
 562                    else:
 563                        found_digit = True
 564                        byte = _hex_as_nybble(cc)
 565                elif cc == 'x':
 566                    found_hex = True
 567                else:
 568                    if cc == 'a':
 569                        list.append('\a')
 570                    elif cc == 'b':
 571                        list.append('\b')
 572                    elif cc == 'f':
 573                        list.append('\f')
 574                    elif cc == 'n':
 575                        list.append('\n')
 576                    elif cc == 'r':
 577                        list.append('\r')
 578                    elif cc == 't':
 579                        list.append('\t')
 580                    elif cc == 'v':
 581                        list.append('\v')
 582                    else:
 583                        list.append(cc)
 584                    found_escape = False
 585            elif cc == '\\':
 586                found_escape = True
 587            elif cc == delim:
 588                break
 589            else:
 590                list.append(cc)
 591        return ''.join(list)
 592
 593class LLSDNotationParser(object):
 594    """ Parse LLSD notation:
 595    map: { string:object, string:object }
 596    array: [ object, object, object ]
 597    undef: !
 598    boolean: true | false | 1 | 0 | T | F | t | f | TRUE | FALSE
 599    integer: i####
 600    real: r####
 601    uuid: u####
 602    string: "g\'day" | 'have a "nice" day' | s(size)"raw data"
 603    uri: l"escaped"
 604    date: d"YYYY-MM-DDTHH:MM:SS.FFZ"
 605    binary: b##"ff3120ab1" | b(size)"raw data"
 606    """
 607    def __init__(self):
 608        pass
 609
 610    def parse(self, buffer, ignore_binary = False):
 611        """
 612        This is the basic public interface for parsing.
 613
 614        @param buffer the notation string to parse.
 615        @param ignore_binary parser throws away data in llsd binary nodes.
 616        @return returns a python object.
 617        """
 618        if buffer == "":
 619            return False
 620
 621        self._buffer = buffer
 622        self._index = 0
 623        return self._parse()
 624
 625    def _parse(self):
 626        cc = self._buffer[self._index]
 627        self._index += 1
 628        if cc == '{':
 629            return self._parse_map()
 630        elif cc == '[':
 631            return self._parse_array()
 632        elif cc == '!':
 633            return None
 634        elif cc == '0':
 635            return False
 636        elif cc == '1':
 637            return True
 638        elif cc in ('F', 'f'):
 639            self._skip_alpha()
 640            return False
 641        elif cc in ('T', 't'):
 642            self._skip_alpha()
 643            return True
 644        elif cc == 'i':
 645            # 'i' = integer
 646            return self._parse_integer()
 647        elif cc == ('r'):
 648            # 'r' = real number
 649            return self._parse_real()
 650        elif cc == 'u':
 651            # 'u' = uuid
 652            return self._parse_uuid()
 653        elif cc in ("'", '"', 's'):
 654            return self._parse_string(cc)
 655        elif cc == 'l':
 656            # 'l' = uri
 657            delim = self._buffer[self._index]
 658            self._index += 1
 659            val = uri(self._parse_string(delim))
 660            if len(val) == 0:
 661                return None
 662            return val
 663        elif cc == ('d'):
 664            # 'd' = date in seconds since epoch
 665            return self._parse_date()
 666        elif cc == 'b':
 667            return self._parse_binary()
 668        else:
 669            raise LLSDParseError("invalid token at index %d: %d" % (
 670                self._index - 1, ord(cc)))
 671
 672    def _parse_binary(self):
 673        i = self._index
 674        if self._buffer[i:i+2] == '64':
 675            q = self._buffer[i+2]
 676            e = self._buffer.find(q, i+3)
 677            try:
 678                return base64.decodestring(self._buffer[i+3:e])
 679            finally:
 680                self._index = e + 1
 681        else:
 682            raise LLSDParseError('random horrible binary format not supported')
 683
 684    def _parse_map(self):
 685        """ map: { string:object, string:object } """
 686        rv = {}
 687        cc = self._buffer[self._index]
 688        self._index += 1
 689        key = ''
 690        found_key = False
 691        while (cc != '}'):
 692            if not found_key:
 693                if cc in ("'", '"', 's'):
 694                    key = self._parse_string(cc)
 695                    found_key = True
 696                elif cc.isspace() or cc == ',':
 697                    cc = self._buffer[self._index]
 698                    self._index += 1
 699                else:
 700                    raise LLSDParseError("invalid map key at byte %d." % (
 701                                        self._index - 1,))
 702            elif cc.isspace() or cc == ':':
 703                cc = self._buffer[self._index]
 704                self._index += 1
 705                continue
 706            else:
 707                self._index += 1
 708                value = self._parse()
 709                rv[key] = value
 710                found_key = False
 711                cc = self._buffer[self._index]
 712                self._index += 1
 713
 714        return rv
 715
 716    def _parse_array(self):
 717        """ array: [ object, object, object ] """
 718        rv = []
 719        cc = self._buffer[self._index]
 720        while (cc != ']'):
 721            if cc.isspace() or cc == ',':
 722                self._index += 1
 723                cc = self._buffer[self._index]
 724                continue
 725            rv.append(self._parse())
 726            cc = self._buffer[self._index]
 727
 728        if cc != ']':
 729            raise LLSDParseError("invalid array close token at index %d." % (
 730                self._index,))
 731        self._index += 1
 732        return rv
 733
 734    def _parse_uuid(self):
 735        match = re.match(lluuid.UUID.uuid_regex, self._buffer[self._index:])
 736        if not match:
 737            raise LLSDParseError("invalid uuid token at index %d." % self._index)
 738
 739        (start, end) = match.span()
 740        start += self._index
 741        end += self._index
 742        self._index = end
 743        return lluuid.UUID(self._buffer[start:end])
 744
 745    def _skip_alpha(self):
 746        match = re.match(alpha_regex, self._buffer[self._index:])
 747        if match:
 748            self._index += match.end()
 749            
 750    def _parse_date(self):
 751        delim = self._buffer[self._index]
 752        self._index += 1
 753        datestr = self._parse_string(delim)
 754        return parse_datestr(datestr)
 755
 756    def _parse_real(self):
 757        match = re.match(real_regex, self._buffer[self._index:])
 758        if not match:
 759            raise LLSDParseError("invalid real token at index %d." % self._index)
 760
 761        (start, end) = match.span()
 762        start += self._index
 763        end += self._index
 764        self._index = end
 765        return float( self._buffer[start:end] )
 766
 767    def _parse_integer(self):
 768        match = re.match(int_regex, self._buffer[self._index:])
 769        if not match:
 770            raise LLSDParseError("invalid integer token at index %d." % self._index)
 771
 772        (start, end) = match.span()
 773        start += self._index
 774        end += self._index
 775        self._index = end
 776        return int( self._buffer[start:end] )
 777
 778    def _parse_string(self, delim):
 779        """ string: "g\'day" | 'have a "nice" day' | s(size)"raw data" """
 780        rv = ""
 781
 782        if delim in ("'", '"'):
 783            rv = self._parse_string_delim(delim)
 784        elif delim == 's':
 785            rv = self._parse_string_raw()
 786        else:
 787            raise LLSDParseError("invalid string token at index %d." % self._index)
 788
 789        return rv
 790
 791
 792    def _parse_string_delim(self, delim):
 793        """ string: "g'day 'un" | 'have a "nice" day' """
 794        list = []
 795        found_escape = False
 796        found_hex = False
 797        found_digit = False
 798        byte = 0
 799        while True:
 800            cc = self._buffer[self._index]
 801            self._index += 1
 802            if found_escape:
 803                if found_hex:
 804                    if found_digit:
 805                        found_escape = False
 806                        found_hex = False
 807                        found_digit = False
 808                        byte <<= 4
 809                        byte |= _hex_as_nybble(cc)
 810                        list.append(chr(byte))
 811                        byte = 0
 812                    else:
 813                        found_digit = True
 814                        byte = _hex_as_nybble(cc)
 815                elif cc == 'x':
 816                    found_hex = True
 817                else:
 818                    if cc == 'a':
 819                        list.append('\a')
 820                    elif cc == 'b':
 821                        list.append('\b')
 822                    elif cc == 'f':
 823                        list.append('\f')
 824                    elif cc == 'n':
 825                        list.append('\n')
 826                    elif cc == 'r':
 827                        list.append('\r')
 828                    elif cc == 't':
 829                        list.append('\t')
 830                    elif cc == 'v':
 831                        list.append('\v')
 832                    else:
 833                        list.append(cc)
 834                    found_escape = False
 835            elif cc == '\\':
 836                found_escape = True
 837            elif cc == delim:
 838                break
 839            else:
 840                list.append(cc)
 841        return ''.join(list)
 842
 843    def _parse_string_raw(self):
 844        """ string: s(size)"raw data" """
 845        # Read the (size) portion.
 846        cc = self._buffer[self._index]
 847        self._index += 1
 848        if cc != '(':
 849            raise LLSDParseError("invalid string token at index %d." % self._index)
 850
 851        rparen = self._buffer.find(')', self._index)
 852        if rparen == -1:
 853            raise LLSDParseError("invalid string token at index %d." % self._index)
 854
 855        size = int(self._buffer[self._index:rparen])
 856
 857        self._index = rparen + 1
 858        delim = self._buffer[self._index]
 859        self._index += 1
 860        if delim not in ("'", '"'):
 861            raise LLSDParseError("invalid string token at index %d." % self._index)
 862
 863        rv = self._buffer[self._index:(self._index + size)]
 864        self._index += size
 865        cc = self._buffer[self._index]
 866        self._index += 1
 867        if cc != delim:
 868            raise LLSDParseError("invalid string token at index %d." % self._index)
 869
 870        return rv
 871        
 872def format_binary(something):
 873    return '<?llsd/binary?>\n' + _format_binary_recurse(something)
 874
 875def _format_binary_recurse(something):
 876    def _format_list(something):
 877        array_builder = []
 878        array_builder.append('[' + struct.pack('!i', len(something)))
 879        for item in something:
 880            array_builder.append(_format_binary_recurse(item))
 881        array_builder.append(']')
 882        return ''.join(array_builder)
 883
 884    if something is None:
 885        return '!'
 886    elif isinstance(something, LLSD):
 887        return _format_binary_recurse(something.thing)
 888    elif isinstance(something, bool):
 889        if something:
 890            return '1'
 891        else:
 892            return '0'
 893    elif isinstance(something, (int, long)):
 894        return 'i' + struct.pack('!i', something)
 895    elif isinstance(something, float):
 896        return 'r' + struct.pack('!d', something)
 897    elif isinstance(something, lluuid.UUID):
 898        return 'u' + something._bits
 899    elif isinstance(something, binary):
 900        return 'b' + struct.pack('!i', len(something)) + something
 901    elif isinstance(something, str):
 902        return 's' + struct.pack('!i', len(something)) + something
 903    elif isinstance(something, unicode):
 904        something = something.encode('utf-8')
 905        return 's' + struct.pack('!i', len(something)) + something
 906    elif isinstance(something, uri):
 907        return 'l' + struct.pack('!i', len(something)) + something
 908    elif isinstance(something, datetime.datetime):
 909        seconds_since_epoch = time.mktime(something.timetuple())
 910        return 'd' + struct.pack('!d', seconds_since_epoch)
 911    elif isinstance(something, (list, tuple)):
 912        return _format_list(something)
 913    elif isinstance(something, dict):
 914        map_builder = []
 915        map_builder.append('{' + struct.pack('!i', len(something)))
 916        for key, value in something.items():
 917            if isinstance(key, unicode):
 918                key = key.encode('utf-8')
 919            map_builder.append('k' + struct.pack('!i', len(key)) + key)
 920            map_builder.append(_format_binary_recurse(value))
 921        map_builder.append('}')
 922        return ''.join(map_builder)
 923    else:
 924        try:
 925            return _format_list(list(something))
 926        except TypeError:
 927            raise LLSDSerializationError(
 928                "Cannot serialize unknown type: %s (%s)" %
 929                (type(something), something))
 930
 931
 932def parse_binary(binary):
 933    if binary.startswith('<?llsd/binary?>'):
 934        just_binary = binary.split('\n', 1)[1]
 935    else:
 936        just_binary = binary
 937    return LLSDBinaryParser().parse(just_binary)
 938
 939def parse_xml(something):
 940    try:
 941        return to_python(fromstring(something)[0])
 942    except ElementTreeError, err:
 943        raise LLSDParseError(*err.args)
 944
 945def parse_notation(something):
 946    return LLSDNotationParser().parse(something)
 947
 948def parse(something):
 949    try:
 950        something = string.lstrip(something)   #remove any pre-trailing whitespace
 951        if something.startswith('<?llsd/binary?>'):
 952            return parse_binary(something)
 953        # This should be better.
 954        elif something.startswith('<'):
 955            return parse_xml(something)
 956        else:
 957            return parse_notation(something)
 958    except KeyError, e:
 959        raise Exception('LLSD could not be parsed: %s' % (e,))
 960
 961class LLSD(object):
 962    def __init__(self, thing=None):
 963        self.thing = thing
 964
 965    def __str__(self):
 966        return self.toXML(self.thing)
 967
 968    parse = staticmethod(parse)
 969    toXML = staticmethod(format_xml)
 970    toPrettyXML = staticmethod(format_pretty_xml)
 971    toBinary = staticmethod(format_binary)
 972    toNotation = staticmethod(format_notation)
 973
 974
 975undef = LLSD(None)
 976
 977XML_MIME_TYPE = 'application/llsd+xml'
 978BINARY_MIME_TYPE = 'application/llsd+binary'
 979
 980# register converters for llsd in mulib, if it is available
 981try:
 982    from mulib import stacked, mu
 983    stacked.NoProducer()  # just to exercise stacked
 984    mu.safe_load(None)    # just to exercise mu
 985except:
 986    # mulib not available, don't print an error message since this is normal
 987    pass
 988else:
 989    mu.add_parser(parse, XML_MIME_TYPE)
 990    mu.add_parser(parse, 'application/llsd+binary')
 991
 992    def llsd_convert_xml(llsd_stuff, request):
 993        request.write(format_xml(llsd_stuff))
 994
 995    def llsd_convert_binary(llsd_stuff, request):
 996        request.write(format_binary(llsd_stuff))
 997
 998    for typ in [LLSD, dict, list, tuple, str, int, long, float, bool, unicode, type(None)]:
 999        stacked.add_producer(typ, llsd_convert_xml, XML_MIME_TYPE)
1000        stacked.add_producer(typ, llsd_convert_xml, 'application/xml')
1001        stacked.add_producer(typ, llsd_convert_xml, 'text/xml')
1002
1003        stacked.add_producer(typ, llsd_convert_binary, 'application/llsd+binary')
1004
1005    stacked.add_producer(LLSD, llsd_convert_xml, '*/*')
1006
1007    # in case someone is using the legacy mu.xml wrapper, we need to
1008    # tell mu to produce application/xml or application/llsd+xml
1009    # (based on the accept header) from raw xml. Phoenix 2008-07-21
1010    stacked.add_producer(mu.xml, mu.produce_raw, XML_MIME_TYPE)
1011    stacked.add_producer(mu.xml, mu.produce_raw, 'application/xml')
1012
1013
1014
1015# mulib wsgi stuff
1016# try:
1017#     from mulib import mu, adapters
1018#
1019#     # try some known attributes from mulib to be ultra-sure we've imported it
1020#     mu.get_current
1021#     adapters.handlers
1022# except:
1023#     # mulib not available, don't print an error message since this is normal
1024#     pass
1025# else:
1026#     def llsd_xml_handler(content_type):
1027#         def handle_llsd_xml(env, start_response):
1028#             llsd_stuff, _ = mu.get_current(env)
1029#             result = format_xml(llsd_stuff)
1030#             start_response("200 OK", [('Content-Type', content_type)])
1031#             env['mu.negotiated_type'] = content_type
1032#             yield result
1033#         return handle_llsd_xml
1034#    
1035#     def llsd_binary_handler(content_type):
1036#         def handle_llsd_binary(env, start_response):
1037#             llsd_stuff, _ = mu.get_current(env)
1038#             result = format_binary(llsd_stuff)
1039#             start_response("200 OK", [('Content-Type', content_type)])
1040#             env['mu.negotiated_type'] = content_type
1041#             yield result
1042#         return handle_llsd_binary
1043#
1044#     adapters.DEFAULT_PARSERS[XML_MIME_TYPE] = parse
1045    
1046#     for typ in [LLSD, dict, list, tuple, str, int, float, bool, unicode, type(None)]:
1047#         for content_type in (XML_MIME_TYPE, 'application/xml'):
1048#             adapters.handlers.set_handler(typ, llsd_xml_handler(content_type), content_type)
1049#
1050#         adapters.handlers.set_handler(typ, llsd_binary_handler(BINARY_MIME_TYPE), BINARY_MIME_TYPE)
1051#
1052#     adapters.handlers.set_handler(LLSD, llsd_xml_handler(XML_MIME_TYPE), '*/*')