/Lib/encodings/utf_32.py

http://unladen-swallow.googlecode.com/ · Python · 144 lines · 121 code · 13 blank · 10 comment · 25 complexity · 4dec95d19efa7bd1cd8a896399429b98 MD5 · raw file

  1. """
  2. Python 'utf-32' Codec
  3. """
  4. import codecs, sys
  5. ### Codec APIs
  6. encode = codecs.utf_32_encode
  7. def decode(input, errors='strict'):
  8. return codecs.utf_32_decode(input, errors, True)
  9. class IncrementalEncoder(codecs.IncrementalEncoder):
  10. def __init__(self, errors='strict'):
  11. codecs.IncrementalEncoder.__init__(self, errors)
  12. self.encoder = None
  13. def encode(self, input, final=False):
  14. if self.encoder is None:
  15. result = codecs.utf_32_encode(input, self.errors)[0]
  16. if sys.byteorder == 'little':
  17. self.encoder = codecs.utf_32_le_encode
  18. else:
  19. self.encoder = codecs.utf_32_be_encode
  20. return result
  21. return self.encoder(input, self.errors)[0]
  22. def reset(self):
  23. codecs.IncrementalEncoder.reset(self)
  24. self.encoder = None
  25. def getstate(self):
  26. # state info we return to the caller:
  27. # 0: stream is in natural order for this platform
  28. # 2: endianness hasn't been determined yet
  29. # (we're never writing in unnatural order)
  30. return (2 if self.encoder is None else 0)
  31. def setstate(self, state):
  32. if state:
  33. self.encoder = None
  34. else:
  35. if sys.byteorder == 'little':
  36. self.encoder = codecs.utf_32_le_encode
  37. else:
  38. self.encoder = codecs.utf_32_be_encode
  39. class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
  40. def __init__(self, errors='strict'):
  41. codecs.BufferedIncrementalDecoder.__init__(self, errors)
  42. self.decoder = None
  43. def _buffer_decode(self, input, errors, final):
  44. if self.decoder is None:
  45. (output, consumed, byteorder) = \
  46. codecs.utf_32_ex_decode(input, errors, 0, final)
  47. if byteorder == -1:
  48. self.decoder = codecs.utf_32_le_decode
  49. elif byteorder == 1:
  50. self.decoder = codecs.utf_32_be_decode
  51. elif consumed >= 4:
  52. raise UnicodeError("UTF-32 stream does not start with BOM")
  53. return (output, consumed)
  54. return self.decoder(input, self.errors, final)
  55. def reset(self):
  56. codecs.BufferedIncrementalDecoder.reset(self)
  57. self.decoder = None
  58. def getstate(self):
  59. # additonal state info from the base class must be None here,
  60. # as it isn't passed along to the caller
  61. state = codecs.BufferedIncrementalDecoder.getstate(self)[0]
  62. # additional state info we pass to the caller:
  63. # 0: stream is in natural order for this platform
  64. # 1: stream is in unnatural order
  65. # 2: endianness hasn't been determined yet
  66. if self.decoder is None:
  67. return (state, 2)
  68. addstate = int((sys.byteorder == "big") !=
  69. (self.decoder is codecs.utf_32_be_decode))
  70. return (state, addstate)
  71. def setstate(self, state):
  72. # state[1] will be ignored by BufferedIncrementalDecoder.setstate()
  73. codecs.BufferedIncrementalDecoder.setstate(self, state)
  74. state = state[1]
  75. if state == 0:
  76. self.decoder = (codecs.utf_32_be_decode
  77. if sys.byteorder == "big"
  78. else codecs.utf_32_le_decode)
  79. elif state == 1:
  80. self.decoder = (codecs.utf_32_le_decode
  81. if sys.byteorder == "big"
  82. else codecs.utf_32_be_decode)
  83. else:
  84. self.decoder = None
  85. class StreamWriter(codecs.StreamWriter):
  86. def __init__(self, stream, errors='strict'):
  87. self.bom_written = False
  88. codecs.StreamWriter.__init__(self, stream, errors)
  89. def encode(self, input, errors='strict'):
  90. self.bom_written = True
  91. result = codecs.utf_32_encode(input, errors)
  92. if sys.byteorder == 'little':
  93. self.encode = codecs.utf_32_le_encode
  94. else:
  95. self.encode = codecs.utf_32_be_encode
  96. return result
  97. class StreamReader(codecs.StreamReader):
  98. def reset(self):
  99. codecs.StreamReader.reset(self)
  100. try:
  101. del self.decode
  102. except AttributeError:
  103. pass
  104. def decode(self, input, errors='strict'):
  105. (object, consumed, byteorder) = \
  106. codecs.utf_32_ex_decode(input, errors, 0, False)
  107. if byteorder == -1:
  108. self.decode = codecs.utf_32_le_decode
  109. elif byteorder == 1:
  110. self.decode = codecs.utf_32_be_decode
  111. elif consumed>=4:
  112. raise UnicodeError,"UTF-32 stream does not start with BOM"
  113. return (object, consumed)
  114. ### encodings module API
  115. def getregentry():
  116. return codecs.CodecInfo(
  117. name='utf-32',
  118. encode=encode,
  119. decode=decode,
  120. incrementalencoder=IncrementalEncoder,
  121. incrementaldecoder=IncrementalDecoder,
  122. streamreader=StreamReader,
  123. streamwriter=StreamWriter,
  124. )