PageRenderTime 1774ms CodeModel.GetById 840ms app.highlight 20ms RepoModel.GetById 214ms app.codeStats 156ms

/apiary/tools/codec.py

https://bitbucket.org/lindenlab/apiary/
Python | 140 lines | 94 code | 6 blank | 40 comment | 5 complexity | ed94663c4e4c61fec1440978e5d40c2d MD5 | raw file
  1#
  2# $LicenseInfo:firstyear=2010&license=mit$
  3# 
  4# Copyright (c) 2010, Linden Research, Inc.
  5# 
  6# Permission is hereby granted, free of charge, to any person obtaining a copy
  7# of this software and associated documentation files (the "Software"), to deal
  8# in the Software without restriction, including without limitation the rights
  9# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10# copies of the Software, and to permit persons to whom the Software is
 11# furnished to do so, subject to the following conditions:
 12# 
 13# The above copyright notice and this permission notice shall be included in
 14# all copies or substantial portions of the Software.
 15# 
 16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 22# THE SOFTWARE.
 23# $/LicenseInfo$
 24#
 25'''
 26codec provides abstractions for messages encodings which live in message queues or data files.
 27'''
 28
 29import re
 30import simplejson
 31from cStringIO import StringIO
 32
 33
 34class Message (object):
 35    '''
 36    A message has an arbitrary structure of headers and an opaque byte-sequence body.
 37
 38    The headers structure is a dict containing any values which can be encoded with json.
 39    '''
 40    MaxElementSize = 2 ** 27 # 128 MiB
 41    BodyLengthKey = 'body_length'
 42    PrologTemplate = '# Apiary Message: %d header bytes.\n'
 43    PrologPattern = re.compile(r'# Apiary Message: (\d+) header bytes.')
 44    
 45    @classmethod
 46    def decode_many_from_file(cls, fp):
 47        '''
 48        This returns an iterator which yields messages as they are parsed.
 49        '''
 50        m = cls.decode_from_file(fp)
 51        while m:
 52            yield m
 53            m = cls.decode_from_file(fp)
 54
 55    @classmethod
 56    def decode_from_string(cls, source):
 57        return cls.decode_from_file(StringIO(source))
 58    
 59    @classmethod
 60    def decode_from_file(cls, fp):
 61        '''
 62        Given a file-like object, return a Message instance, None, or raise a FormatException.
 63
 64        None is raised if the file is at EOF.
 65        '''
 66        prolog = fp.readline()
 67        if prolog == '':
 68            return None
 69        m = cls.PrologPattern.match(prolog)
 70        if m is None:
 71            raise FormatError('Could not decode prolog: %r', prolog)
 72        hlen = int(m.group(1))
 73        if hlen > cls.MaxElementSize:
 74            raise FormatError('Prolog header length %d is larger than the maximum allowed %d bytes.',
 75                              hlen,
 76                              cls.MaxElementSize)
 77        headerchunk = fp.read(hlen)
 78        newline = fp.read(1)
 79        if newline != '\n':
 80            raise FormatError('Headers not terminated by a newline.')
 81        headers = simplejson.loads(headerchunk)
 82        if type(headers) is not dict:
 83            raise FormatError('Headers must be a mapping, but stream contains: %r',
 84                              headers)
 85        # Translate the keys to utf8 so that ** application works:
 86        headers = dict( ((k.encode('utf8'), v) for (k, v) in headers.items()) )
 87        
 88        bodylength = headers.get(cls.BodyLengthKey)
 89        if type(bodylength) is not int or not (0 <= bodylength < cls.MaxElementSize):
 90            raise FormatError('Invalid %r header: %r',
 91                              cls.BodyLengthKey,
 92                              bodylength)
 93        body = fp.read(bodylength)
 94        newline = fp.read(1)
 95        if newline != '\n':
 96            raise FormatError('Body not terminated by a newline.')
 97        try:
 98            return cls(body, **headers)
 99        except TypeError, e:
100            raise FormatError('Invalid headers - %s: %r',
101                              ' '.join(e.args),
102                              headers)
103            
104    def __init__(self, body, **headers):
105        if headers.has_key(self.BodyLengthKey):
106            lenhdr = headers.pop(self.BodyLengthKey)
107            if len(body) != lenhdr:
108                raise FormatError('Incorrect %s header: %d != %d', self.BodyLengthKey, lenhdr, len(body))
109        self.body = body
110        self.headers = headers
111
112    def encode_to_string(self):
113        f = StringIO()
114        self.encode_to_file(f)
115        return f.getvalue()
116    
117    def encode_to_file(self, fp):
118        hdrs = self.headers.copy()
119        hdrs[self.BodyLengthKey] = len(self.body)
120
121        try:
122            headerchunk = simplejson.dumps(hdrs)
123        except TypeError, e:
124            raise FormatError('Failure to encode headers - %s: %r',
125                              ' '.join(e.args),
126                              hdrs)
127        hlen = len(headerchunk)
128        if hlen > self.MaxElementSize:
129            raise FormatError('Header encoding is %d bytes which is larger than the maximum allowed %d bytes.',
130                              hlen,
131                              self.MaxElementSize)
132        
133        fp.write(self.PrologTemplate % hlen)
134        fp.write(headerchunk + '\n')
135        fp.write(self.body + '\n')
136
137
138class FormatError (Exception):
139    def __init__(self, tmpl, *args):
140        Exception.__init__(self, tmpl % args)