/mutagen/id3.py
https://github.com/amahi/pytivo · Python · 2080 lines · 1815 code · 99 blank · 166 comment · 278 complexity · e76361874a5520350b0f584814fc4cf1 MD5 · raw file
Large files are truncated click here to view the full file
- # id3 support for mutagen
- # Copyright (C) 2005 Michael Urman
- #
- # This program is free software; you can redistribute it and/or modify
- # it under the terms of version 2 of the GNU General Public License as
- # published by the Free Software Foundation.
- #
- # $Id: id3.py 4285 2008-09-06 08:01:31Z piman $
- """ID3v2 reading and writing.
- This is based off of the following references:
- http://www.id3.org/id3v2.4.0-structure.txt
- http://www.id3.org/id3v2.4.0-frames.txt
- http://www.id3.org/id3v2.3.0.html
- http://www.id3.org/id3v2-00.txt
- http://www.id3.org/id3v1.html
- Its largest deviation from the above (versions 2.3 and 2.2) is that it
- will not interpret the / characters as a separator, and will almost
- always accept null separators to generate multi-valued text frames.
- Because ID3 frame structure differs between frame types, each frame is
- implemented as a different class (e.g. TIT2 as mutagen.id3.TIT2). Each
- frame's documentation contains a list of its attributes.
- Since this file's documentation is a little unwieldy, you are probably
- interested in the 'ID3' class to start with.
- """
- __all__ = ['ID3', 'ID3FileType', 'Frames', 'Open', 'delete']
- import struct
- from struct import unpack, pack, error as StructError
- from zlib import error as zlibError
- from warnings import warn
- import mutagen
- from mutagen._util import insert_bytes, delete_bytes, DictProxy
- class error(Exception): pass
- class ID3NoHeaderError(error, ValueError): pass
- class ID3BadUnsynchData(error, ValueError): pass
- class ID3BadCompressedData(error, ValueError): pass
- class ID3TagError(error, ValueError): pass
- class ID3UnsupportedVersionError(error, NotImplementedError): pass
- class ID3EncryptionUnsupportedError(error, NotImplementedError): pass
- class ID3JunkFrameError(error, ValueError): pass
- class ID3Warning(error, UserWarning): pass
- def is_valid_frame_id(frame_id):
- return frame_id.isalnum() and frame_id.isupper()
- class ID3(DictProxy, mutagen.Metadata):
- """A file with an ID3v2 tag.
- Attributes:
- version -- ID3 tag version as a tuple
- unknown_frames -- raw frame data of any unknown frames found
- size -- the total size of the ID3 tag, including the header
- """
- PEDANTIC = True
- version = (2, 4, 0)
- filename = None
- size = 0
- __flags = 0
- __readbytes = 0
- __crc = None
- def __init__(self, *args, **kwargs):
- self.unknown_frames = []
- super(ID3, self).__init__(*args, **kwargs)
- def __fullread(self, size):
- try:
- if size < 0:
- raise ValueError('Requested bytes (%s) less than zero' % size)
- if size > self.__filesize:
- raise EOFError('Requested %#x of %#x (%s)' %
- (long(size), long(self.__filesize), self.filename))
- except AttributeError: pass
- data = self.__fileobj.read(size)
- if len(data) != size: raise EOFError
- self.__readbytes += size
- return data
- def load(self, filename, known_frames=None, translate=True):
- """Load tags from a filename.
- Keyword arguments:
- filename -- filename to load tag data from
- known_frames -- dict mapping frame IDs to Frame objects
- translate -- Update all tags to ID3v2.4 internally. Mutagen is
- only capable of writing ID3v2.4 tags, so if you
- intend to save, this must be true.
- Example of loading a custom frame:
- my_frames = dict(mutagen.id3.Frames)
- class XMYF(Frame): ...
- my_frames["XMYF"] = XMYF
- mutagen.id3.ID3(filename, known_frames=my_frames)
- """
- from os.path import getsize
- self.filename = filename
- self.__known_frames = known_frames
- self.__fileobj = open(filename, 'rb')
- self.__filesize = getsize(filename)
- try:
- try:
- self.__load_header()
- except EOFError:
- self.size = 0
- raise ID3NoHeaderError("%s: too small (%d bytes)" %(
- filename, self.__filesize))
- except (ID3NoHeaderError, ID3UnsupportedVersionError), err:
- self.size = 0
- import sys
- stack = sys.exc_info()[2]
- try: self.__fileobj.seek(-128, 2)
- except EnvironmentError: raise err, None, stack
- else:
- frames = ParseID3v1(self.__fileobj.read(128))
- if frames is not None:
- self.version = (1, 1)
- map(self.add, frames.values())
- else: raise err, None, stack
- else:
- frames = self.__known_frames
- if frames is None:
- if (2,3,0) <= self.version: frames = Frames
- elif (2,2,0) <= self.version: frames = Frames_2_2
- data = self.__fullread(self.size - 10)
- for frame in self.__read_frames(data, frames=frames):
- if isinstance(frame, Frame): self.add(frame)
- else: self.unknown_frames.append(frame)
- finally:
- self.__fileobj.close()
- del self.__fileobj
- del self.__filesize
- if translate:
- self.update_to_v24()
- def getall(self, key):
- """Return all frames with a given name (the list may be empty).
- This is best explained by examples:
- id3.getall('TIT2') == [id3['TIT2']]
- id3.getall('TTTT') == []
- id3.getall('TXXX') == [TXXX(desc='woo', text='bar'),
- TXXX(desc='baz', text='quuuux'), ...]
- Since this is based on the frame's HashKey, which is
- colon-separated, you can use it to do things like
- getall('COMM:MusicMatch') or getall('TXXX:QuodLibet:').
- """
- if key in self: return [self[key]]
- else:
- key = key + ":"
- return [v for s,v in self.items() if s.startswith(key)]
- def delall(self, key):
- """Delete all tags of a given kind; see getall."""
- if key in self: del(self[key])
- else:
- key = key + ":"
- for k in filter(lambda s: s.startswith(key), self.keys()):
- del(self[k])
- def setall(self, key, values):
- """Delete frames of the given type and add frames in 'values'."""
- self.delall(key)
- for tag in values:
- self[tag.HashKey] = tag
- def pprint(self):
- """Return tags in a human-readable format.
- "Human-readable" is used loosely here. The format is intended
- to mirror that used for Vorbis or APEv2 output, e.g.
- TIT2=My Title
- However, ID3 frames can have multiple keys:
- POPM=user@example.org=3 128/255
- """
- frames = list(map(Frame.pprint, self.values()))
- frames.sort()
- return "\n".join(frames)
- def loaded_frame(self, tag):
- """Deprecated; use the add method."""
- # turn 2.2 into 2.3/2.4 tags
- if len(type(tag).__name__) == 3: tag = type(tag).__base__(tag)
- self[tag.HashKey] = tag
- # add = loaded_frame (and vice versa) break applications that
- # expect to be able to override loaded_frame (e.g. Quod Libet),
- # as does making loaded_frame call add.
- def add(self, frame):
- """Add a frame to the tag."""
- return self.loaded_frame(frame)
- def __load_header(self):
- fn = self.filename
- data = self.__fullread(10)
- id3, vmaj, vrev, flags, size = unpack('>3sBBB4s', data)
- self.__flags = flags
- self.size = BitPaddedInt(size) + 10
- self.version = (2, vmaj, vrev)
- if id3 != 'ID3':
- raise ID3NoHeaderError("'%s' doesn't start with an ID3 tag" % fn)
- if vmaj not in [2, 3, 4]:
- raise ID3UnsupportedVersionError("'%s' ID3v2.%d not supported"
- % (fn, vmaj))
- if self.PEDANTIC:
- if (2,4,0) <= self.version and (flags & 0x0f):
- raise ValueError("'%s' has invalid flags %#02x" % (fn, flags))
- elif (2,3,0) <= self.version < (2,4,0) and (flags & 0x1f):
- raise ValueError("'%s' has invalid flags %#02x" % (fn, flags))
- if self.f_extended:
- extsize = self.__fullread(4)
- if extsize in Frames:
- # Some tagger sets the extended header flag but
- # doesn't write an extended header; in this case, the
- # ID3 data follows immediately. Since no extended
- # header is going to be long enough to actually match
- # a frame, and if it's *not* a frame we're going to be
- # completely lost anyway, this seems to be the most
- # correct check.
- # http://code.google.com/p/quodlibet/issues/detail?id=126
- self.__flags ^= 0x40
- self.__extsize = 0
- self.__fileobj.seek(-4, 1)
- self.__readbytes -= 4
- elif self.version >= (2,4,0):
- # "Where the 'Extended header size' is the size of the whole
- # extended header, stored as a 32 bit synchsafe integer."
- self.__extsize = BitPaddedInt(extsize) - 4
- else:
- # "Where the 'Extended header size', currently 6 or 10 bytes,
- # excludes itself."
- self.__extsize = unpack('>L', extsize)[0]
- if self.__extsize:
- self.__extdata = self.__fullread(self.__extsize)
- else:
- self.__extdata = ""
- def __determine_bpi(self, data, frames, EMPTY="\x00" * 10):
- if self.version < (2, 4, 0):
- return int
- # have to special case whether to use bitpaddedints here
- # spec says to use them, but iTunes has it wrong
- # count number of tags found as BitPaddedInt and how far past
- o = 0
- asbpi = 0
- while o < len(data) - 10:
- part = data[o:o + 10]
- if part == EMPTY:
- bpioff = -((len(data) - o) % 10)
- break
- name, size, flags = unpack('>4sLH', part)
- size = BitPaddedInt(size)
- o += 10 + size
- if name in frames:
- asbpi += 1
- else:
- bpioff = o - len(data)
- # count number of tags found as int and how far past
- o = 0
- asint = 0
- while o < len(data) - 10:
- part = data[o:o + 10]
- if part == EMPTY:
- intoff = -((len(data) - o) % 10)
- break
- name, size, flags = unpack('>4sLH', part)
- o += 10 + size
- if name in frames:
- asint += 1
- else:
- intoff = o - len(data)
- # if more tags as int, or equal and bpi is past and int is not
- if asint > asbpi or (asint == asbpi and (bpioff >= 1 and intoff <= 1)):
- return int
- return BitPaddedInt
- def __read_frames(self, data, frames):
- if self.version < (2,4,0) and self.f_unsynch:
- try: data = unsynch.decode(data)
- except ValueError: pass
- if (2,3,0) <= self.version:
- bpi = self.__determine_bpi(data, frames)
- while data:
- header = data[:10]
- try: name, size, flags = unpack('>4sLH', header)
- except struct.error: return # not enough header
- if name.strip('\x00') == '': return
- size = bpi(size)
- framedata = data[10:10+size]
- data = data[10+size:]
- if size == 0: continue # drop empty frames
- try: tag = frames[name]
- except KeyError:
- if is_valid_frame_id(name): yield header + framedata
- else:
- try: yield self.__load_framedata(tag, flags, framedata)
- except NotImplementedError: yield header + framedata
- except ID3JunkFrameError: pass
- elif (2,2,0) <= self.version:
- while data:
- header = data[0:6]
- try: name, size = unpack('>3s3s', header)
- except struct.error: return # not enough header
- size, = struct.unpack('>L', '\x00'+size)
- if name.strip('\x00') == '': return
- framedata = data[6:6+size]
- data = data[6+size:]
- if size == 0: continue # drop empty frames
- try: tag = frames[name]
- except KeyError:
- if is_valid_frame_id(name): yield header + framedata
- else:
- try: yield self.__load_framedata(tag, 0, framedata)
- except NotImplementedError: yield header + framedata
- except ID3JunkFrameError: pass
- def __load_framedata(self, tag, flags, framedata):
- return tag.fromData(self, flags, framedata)
-
- f_unsynch = property(lambda s: bool(s.__flags & 0x80))
- f_extended = property(lambda s: bool(s.__flags & 0x40))
- f_experimental = property(lambda s: bool(s.__flags & 0x20))
- f_footer = property(lambda s: bool(s.__flags & 0x10))
- #f_crc = property(lambda s: bool(s.__extflags & 0x8000))
- def save(self, filename=None, v1=1):
- """Save changes to a file.
- If no filename is given, the one most recently loaded is used.
- Keyword arguments:
- v1 -- if 0, ID3v1 tags will be removed
- if 1, ID3v1 tags will be updated but not added
- if 2, ID3v1 tags will be created and/or updated
- The lack of a way to update only an ID3v1 tag is intentional.
- """
- # Sort frames by 'importance'
- order = ["TIT2", "TPE1", "TRCK", "TALB", "TPOS", "TDRC", "TCON"]
- order = dict(zip(order, range(len(order))))
- last = len(order)
- frames = self.items()
- frames.sort(lambda a, b: cmp(order.get(a[0][:4], last),
- order.get(b[0][:4], last)))
- framedata = [self.__save_frame(frame) for (key, frame) in frames]
- framedata.extend([data for data in self.unknown_frames
- if len(data) > 10])
- if not framedata:
- try:
- self.delete(filename)
- except EnvironmentError, err:
- from errno import ENOENT
- if err.errno != ENOENT: raise
- return
- framedata = ''.join(framedata)
- framesize = len(framedata)
- if filename is None: filename = self.filename
- try: f = open(filename, 'rb+')
- except IOError, err:
- from errno import ENOENT
- if err.errno != ENOENT: raise
- f = open(filename, 'ab') # create, then reopen
- f = open(filename, 'rb+')
- try:
- idata = f.read(10)
- try: id3, vmaj, vrev, flags, insize = unpack('>3sBBB4s', idata)
- except struct.error: id3, insize = '', 0
- insize = BitPaddedInt(insize)
- if id3 != 'ID3': insize = -10
- if insize >= framesize: outsize = insize
- else: outsize = (framesize + 1023) & ~0x3FF
- framedata += '\x00' * (outsize - framesize)
- framesize = BitPaddedInt.to_str(outsize, width=4)
- flags = 0
- header = pack('>3sBBB4s', 'ID3', 4, 0, flags, framesize)
- data = header + framedata
- if (insize < outsize):
- insert_bytes(f, outsize-insize, insize+10)
- f.seek(0)
- f.write(data)
- try:
- f.seek(-128, 2)
- except IOError, err:
- # If the file is too small, that's OK - it just means
- # we're certain it doesn't have a v1 tag.
- from errno import EINVAL
- if err.errno != EINVAL:
- # If we failed to see for some other reason, bail out.
- raise
- # Since we're sure this isn't a v1 tag, don't read it.
- f.seek(0, 2)
- data = f.read(128)
- try:
- idx = data.index("TAG")
- except ValueError:
- offset = 0
- has_v1 = False
- else:
- offset = idx - len(data)
- has_v1 = True
-
- f.seek(offset, 2)
- if v1 == 1 and has_v1 or v1 == 2:
- f.write(MakeID3v1(self))
- else:
- f.truncate()
- finally:
- f.close()
- def delete(self, filename=None, delete_v1=True, delete_v2=True):
- """Remove tags from a file.
- If no filename is given, the one most recently loaded is used.
- Keyword arguments:
- delete_v1 -- delete any ID3v1 tag
- delete_v2 -- delete any ID3v2 tag
- """
- if filename is None:
- filename = self.filename
- delete(filename, delete_v1, delete_v2)
- self.clear()
- def __save_frame(self, frame):
- flags = 0
- if self.PEDANTIC and isinstance(frame, TextFrame):
- if len(str(frame)) == 0: return ''
- framedata = frame._writeData()
- usize = len(framedata)
- if usize > 2048:
- # Disabled as this causes iTunes and other programs
- # to fail to find these frames, which usually includes
- # e.g. APIC.
- #framedata = BitPaddedInt.to_str(usize) + framedata.encode('zlib')
- #flags |= Frame.FLAG24_COMPRESS | Frame.FLAG24_DATALEN
- pass
- datasize = BitPaddedInt.to_str(len(framedata), width=4)
- header = pack('>4s4sH', type(frame).__name__, datasize, flags)
- return header + framedata
- def update_to_v24(self):
- """Convert older tags into an ID3v2.4 tag.
- This updates old ID3v2 frames to ID3v2.4 ones (e.g. TYER to
- TDRC). If you intend to save tags, you must call this function
- at some point; it is called by default when loading the tag.
- """
- if self.version < (2,3,0): del self.unknown_frames[:]
- # unsafe to write
- # TDAT, TYER, and TIME have been turned into TDRC.
- try:
- if str(self.get("TYER", "")).strip("\x00"):
- date = str(self.pop("TYER"))
- if str(self.get("TDAT", "")).strip("\x00"):
- dat = str(self.pop("TDAT"))
- date = "%s-%s-%s" % (date, dat[2:], dat[:2])
- if str(self.get("TIME", "")).strip("\x00"):
- time = str(self.pop("TIME"))
- date += "T%s:%s:00" % (time[:2], time[2:])
- if "TDRC" not in self:
- self.add(TDRC(encoding=0, text=date))
- except UnicodeDecodeError:
- # Old ID3 tags have *lots* of Unicode problems, so if TYER
- # is bad, just chuck the frames.
- pass
- # TORY can be the first part of a TDOR.
- if "TORY" in self:
- f = self.pop("TORY")
- if "TDOR" not in self:
- try:
- self.add(TDOR(encoding=0, text=str(f)))
- except UnicodeDecodeError:
- pass
- # IPLS is now TIPL.
- if "IPLS" in self:
- f = self.pop("IPLS")
- if "TIPL" not in self:
- self.add(TIPL(encoding=f.encoding, people=f.people))
- if "TCON" in self:
- # Get rid of "(xx)Foobr" format.
- self["TCON"].genres = self["TCON"].genres
- if self.version < (2, 3):
- # ID3v2.2 PIC frames are slightly different.
- pics = self.getall("APIC")
- mimes = { "PNG": "image/png", "JPG": "image/jpeg" }
- self.delall("APIC")
- for pic in pics:
- newpic = APIC(
- encoding=pic.encoding, mime=mimes.get(pic.mime, pic.mime),
- type=pic.type, desc=pic.desc, data=pic.data)
- self.add(newpic)
- # ID3v2.2 LNK frames are just way too different to upgrade.
- self.delall("LINK")
- # These can't be trivially translated to any ID3v2.4 tags, or
- # should have been removed already.
- for key in ["RVAD", "EQUA", "TRDA", "TSIZ", "TDAT", "TIME", "CRM"]:
- if key in self: del(self[key])
- def delete(filename, delete_v1=True, delete_v2=True):
- """Remove tags from a file.
- Keyword arguments:
- delete_v1 -- delete any ID3v1 tag
- delete_v2 -- delete any ID3v2 tag
- """
- f = open(filename, 'rb+')
- if delete_v1:
- try:
- f.seek(-128, 2)
- except IOError: pass
- else:
- if f.read(3) == "TAG":
- f.seek(-128, 2)
- f.truncate()
- # technically an insize=0 tag is invalid, but we delete it anyway
- # (primarily because we used to write it)
- if delete_v2:
- f.seek(0, 0)
- idata = f.read(10)
- try: id3, vmaj, vrev, flags, insize = unpack('>3sBBB4s', idata)
- except struct.error: id3, insize = '', -1
- insize = BitPaddedInt(insize)
- if id3 == 'ID3' and insize >= 0:
- delete_bytes(f, insize + 10, 0)
- class BitPaddedInt(int):
- def __new__(cls, value, bits=7, bigendian=True):
- "Strips 8-bits bits out of every byte"
- mask = (1<<(bits))-1
- if isinstance(value, (int, long)):
- bytes = []
- while value:
- bytes.append(value & ((1<<bits)-1))
- value = value >> 8
- if isinstance(value, str):
- bytes = [ord(byte) & mask for byte in value]
- if bigendian: bytes.reverse()
- numeric_value = 0
- for shift, byte in zip(range(0, len(bytes)*bits, bits), bytes):
- numeric_value += byte << shift
- if isinstance(numeric_value, long):
- self = long.__new__(BitPaddedLong, numeric_value)
- else:
- self = int.__new__(BitPaddedInt, numeric_value)
- self.bits = bits
- self.bigendian = bigendian
- return self
- def as_str(value, bits=7, bigendian=True, width=4):
- bits = getattr(value, 'bits', bits)
- bigendian = getattr(value, 'bigendian', bigendian)
- value = int(value)
- mask = (1<<bits)-1
- bytes = []
- while value:
- bytes.append(value & mask)
- value = value >> bits
- # PCNT and POPM use growing integers of at least 4 bytes as counters.
- if width == -1: width = max(4, len(bytes))
- if len(bytes) > width:
- raise ValueError, 'Value too wide (%d bytes)' % len(bytes)
- else: bytes.extend([0] * (width-len(bytes)))
- if bigendian: bytes.reverse()
- return ''.join(map(chr, bytes))
- to_str = staticmethod(as_str)
- class BitPaddedLong(long):
- def as_str(value, bits=7, bigendian=True, width=4):
- return BitPaddedInt.to_str(value, bits, bigendian, width)
- to_str = staticmethod(as_str)
- class unsynch(object):
- def decode(value):
- output = []
- safe = True
- append = output.append
- for val in value:
- if safe:
- append(val)
- safe = val != '\xFF'
- else:
- if val >= '\xE0': raise ValueError('invalid sync-safe string')
- elif val != '\x00': append(val)
- safe = True
- if not safe: raise ValueError('string ended unsafe')
- return ''.join(output)
- decode = staticmethod(decode)
- def encode(value):
- output = []
- safe = True
- append = output.append
- for val in value:
- if safe:
- append(val)
- if val == '\xFF': safe = False
- elif val == '\x00' or val >= '\xE0':
- append('\x00')
- append(val)
- safe = val != '\xFF'
- else:
- append(val)
- safe = True
- if not safe: append('\x00')
- return ''.join(output)
- encode = staticmethod(encode)
- class Spec(object):
- def __init__(self, name): self.name = name
- def __hash__(self): raise TypeError("Spec objects are unhashable")
- class ByteSpec(Spec):
- def read(self, frame, data): return ord(data[0]), data[1:]
- def write(self, frame, value): return chr(value)
- def validate(self, frame, value): return value
- class IntegerSpec(Spec):
- def read(self, frame, data):
- return int(BitPaddedInt(data, bits=8)), ''
- def write(self, frame, value):
- return BitPaddedInt.to_str(value, bits=8, width=-1)
- def validate(self, frame, value):
- return value
- class SizedIntegerSpec(Spec):
- def __init__(self, name, size):
- self.name, self.__sz = name, size
- def read(self, frame, data):
- return int(BitPaddedInt(data[:self.__sz], bits=8)), data[self.__sz:]
- def write(self, frame, value):
- return BitPaddedInt.to_str(value, bits=8, width=self.__sz)
- def validate(self, frame, value):
- return value
- class EncodingSpec(ByteSpec):
- def read(self, frame, data):
- enc, data = super(EncodingSpec, self).read(frame, data)
- if enc < 16: return enc, data
- else: return 0, chr(enc)+data
- def validate(self, frame, value):
- if 0 <= value <= 3: return value
- if value is None: return None
- raise ValueError, 'Invalid Encoding: %r' % value
- class StringSpec(Spec):
- def __init__(self, name, length):
- super(StringSpec, self).__init__(name)
- self.len = length
- def read(s, frame, data): return data[:s.len], data[s.len:]
- def write(s, frame, value):
- if value is None: return '\x00' * s.len
- else: return (str(value) + '\x00' * s.len)[:s.len]
- def validate(s, frame, value):
- if value is None: return None
- if isinstance(value, basestring) and len(value) == s.len: return value
- raise ValueError, 'Invalid StringSpec[%d] data: %r' % (s.len, value)
- class BinaryDataSpec(Spec):
- def read(self, frame, data): return data, ''
- def write(self, frame, value): return str(value)
- def validate(self, frame, value): return str(value)
- class EncodedTextSpec(Spec):
- # Okay, seriously. This is private and defined explicitly and
- # completely by the ID3 specification. You can't just add
- # encodings here however you want.
- _encodings = ( ('latin1', '\x00'), ('utf16', '\x00\x00'),
- ('utf_16_be', '\x00\x00'), ('utf8', '\x00') )
- def read(self, frame, data):
- enc, term = self._encodings[frame.encoding]
- ret = ''
- if len(term) == 1:
- if term in data:
- data, ret = data.split(term, 1)
- else:
- offset = -1
- try:
- while True:
- offset = data.index(term, offset+1)
- if offset & 1: continue
- data, ret = data[0:offset], data[offset+2:]; break
- except ValueError: pass
- if len(data) < len(term): return u'', ret
- return data.decode(enc), ret
- def write(self, frame, value):
- enc, term = self._encodings[frame.encoding]
- return value.encode(enc) + term
- def validate(self, frame, value): return unicode(value)
- class MultiSpec(Spec):
- def __init__(self, name, *specs, **kw):
- super(MultiSpec, self).__init__(name)
- self.specs = specs
- self.sep = kw.get('sep')
- def read(self, frame, data):
- values = []
- while data:
- record = []
- for spec in self.specs:
- value, data = spec.read(frame, data)
- record.append(value)
- if len(self.specs) != 1: values.append(record)
- else: values.append(record[0])
- return values, data
- def write(self, frame, value):
- data = []
- if len(self.specs) == 1:
- for v in value:
- data.append(self.specs[0].write(frame, v))
- else:
- for record in value:
- for v, s in zip(record, self.specs):
- data.append(s.write(frame, v))
- return ''.join(data)
- def validate(self, frame, value):
- if value is None: return []
- if self.sep and isinstance(value, basestring):
- value = value.split(self.sep)
- if isinstance(value, list):
- if len(self.specs) == 1:
- return [self.specs[0].validate(frame, v) for v in value]
- else:
- return [
- [s.validate(frame, v) for (v,s) in zip(val, self.specs)]
- for val in value ]
- raise ValueError, 'Invalid MultiSpec data: %r' % value
- class EncodedNumericTextSpec(EncodedTextSpec): pass
- class EncodedNumericPartTextSpec(EncodedTextSpec): pass
- class Latin1TextSpec(EncodedTextSpec):
- def read(self, frame, data):
- if '\x00' in data: data, ret = data.split('\x00',1)
- else: ret = ''
- return data.decode('latin1'), ret
- def write(self, data, value):
- return value.encode('latin1') + '\x00'
- def validate(self, frame, value): return unicode(value)
- class ID3TimeStamp(object):
- """A time stamp in ID3v2 format.
- This is a restricted form of the ISO 8601 standard; time stamps
- take the form of:
- YYYY-MM-DD HH:MM:SS
- Or some partial form (YYYY-MM-DD HH, YYYY, etc.).
- The 'text' attribute contains the raw text data of the time stamp.
- """
- import re
- def __init__(self, text):
- if isinstance(text, ID3TimeStamp): text = text.text
- self.text = text
- __formats = ['%04d'] + ['%02d'] * 5
- __seps = ['-', '-', ' ', ':', ':', 'x']
- def get_text(self):
- parts = [self.year, self.month, self.day,
- self.hour, self.minute, self.second]
- pieces = []
- for i, part in enumerate(iter(iter(parts).next, None)):
- pieces.append(self.__formats[i]%part + self.__seps[i])
- return u''.join(pieces)[:-1]
- def set_text(self, text, splitre=re.compile('[-T:/.]|\s+')):
- year, month, day, hour, minute, second = \
- splitre.split(text + ':::::')[:6]
- for a in 'year month day hour minute second'.split():
- try: v = int(locals()[a])
- except ValueError: v = None
- setattr(self, a, v)
- text = property(get_text, set_text, doc="ID3v2.4 date and time.")
- def __str__(self): return self.text
- def __repr__(self): return repr(self.text)
- def __cmp__(self, other): return cmp(self.text, other.text)
- __hash__ = object.__hash__
- def encode(self, *args): return self.text.encode(*args)
- class TimeStampSpec(EncodedTextSpec):
- def read(self, frame, data):
- value, data = super(TimeStampSpec, self).read(frame, data)
- return self.validate(frame, value), data
- def write(self, frame, data):
- return super(TimeStampSpec, self).write(frame,
- data.text.replace(' ', 'T'))
- def validate(self, frame, value):
- try: return ID3TimeStamp(value)
- except TypeError: raise ValueError, "Invalid ID3TimeStamp: %r" % value
- class ChannelSpec(ByteSpec):
- (OTHER, MASTER, FRONTRIGHT, FRONTLEFT, BACKRIGHT, BACKLEFT, FRONTCENTRE,
- BACKCENTRE, SUBWOOFER) = range(9)
- class VolumeAdjustmentSpec(Spec):
- def read(self, frame, data):
- value, = unpack('>h', data[0:2])
- return value/512.0, data[2:]
- def write(self, frame, value):
- return pack('>h', int(round(value * 512)))
- def validate(self, frame, value): return value
- class VolumePeakSpec(Spec):
- def read(self, frame, data):
- # http://bugs.xmms.org/attachment.cgi?id=113&action=view
- peak = 0
- bits = ord(data[0])
- bytes = min(4, (bits + 7) >> 3)
- # not enough frame data
- if bytes + 1 > len(data): raise ID3JunkFrameError
- shift = ((8 - (bits & 7)) & 7) + (4 - bytes) * 8
- for i in range(1, bytes+1):
- peak *= 256
- peak += ord(data[i])
- peak *= 2**shift
- return (float(peak) / (2**31-1)), data[1+bytes:]
- def write(self, frame, value):
- # always write as 16 bits for sanity.
- return "\x10" + pack('>H', int(round(value * 32768)))
- def validate(self, frame, value): return value
- class SynchronizedTextSpec(EncodedTextSpec):
- def read(self, frame, data):
- texts = []
- encoding, term = self._encodings[frame.encoding]
- while data:
- l = len(term)
- try:
- value_idx = data.index(term)
- except ValueError:
- raise ID3JunkFrameError
- value = data[:value_idx].decode(encoding)
- time, = struct.unpack(">I", data[value_idx+l:value_idx+l+4])
- texts.append((value, time))
- data = data[value_idx+l+4:]
- return texts, ""
- def write(self, frame, value):
- data = []
- encoding, term = self._encodings[frame.encoding]
- for text, time in frame.text:
- text = text.encode(encoding) + term
- data.append(text + struct.pack(">I", time))
- return "".join(data)
- def validate(self, frame, value):
- return value
- class KeyEventSpec(Spec):
- def read(self, frame, data):
- events = []
- while len(data) >= 5:
- events.append(struct.unpack(">bI", data[:5]))
- data = data[5:]
- return events, data
- def write(self, frame, value):
- return "".join([struct.pack(">bI", *event) for event in value])
- def validate(self, frame, value):
- return value
- class VolumeAdjustmentsSpec(Spec):
- # Not to be confused with VolumeAdjustmentSpec.
- def read(self, frame, data):
- adjustments = {}
- while len(data) >= 4:
- freq, adj = struct.unpack(">Hh", data[:4])
- data = data[4:]
- freq /= 2.0
- adj /= 512.0
- adjustments[freq] = adj
- adjustments = adjustments.items()
- adjustments.sort()
- return adjustments, data
- def write(self, frame, value):
- value.sort()
- return "".join([struct.pack(">Hh", int(freq * 2), int(adj * 512))
- for (freq, adj) in value])
- def validate(self, frame, value):
- return value
- class ASPIIndexSpec(Spec):
- def read(self, frame, data):
- if frame.b == 16:
- format = "H"
- size = 2
- elif frame.b == 8:
- format = "B"
- size = 1
- else:
- warn("invalid bit count in ASPI (%d)" % frame.b, ID3Warning)
- return [], data
-
- indexes = data[:frame.N * size]
- data = data[frame.N * size:]
- return list(struct.unpack(">" + format * frame.N, indexes)), data
- def write(self, frame, values):
- if frame.b == 16: format = "H"
- elif frame.b == 8: format = "B"
- else: raise ValueError("frame.b must be 8 or 16")
- return struct.pack(">" + format * frame.N, *values)
- def validate(self, frame, values):
- return values
- class Frame(object):
- """Fundamental unit of ID3 data.
- ID3 tags are split into frames. Each frame has a potentially
- different structure, and so this base class is not very featureful.
- """
- FLAG23_ALTERTAG = 0x8000
- FLAG23_ALTERFILE = 0x4000
- FLAG23_READONLY = 0x2000
- FLAG23_COMPRESS = 0x0080
- FLAG23_ENCRYPT = 0x0040
- FLAG23_GROUP = 0x0020
- FLAG24_ALTERTAG = 0x4000
- FLAG24_ALTERFILE = 0x2000
- FLAG24_READONLY = 0x1000
- FLAG24_GROUPID = 0x0040
- FLAG24_COMPRESS = 0x0008
- FLAG24_ENCRYPT = 0x0004
- FLAG24_UNSYNCH = 0x0002
- FLAG24_DATALEN = 0x0001
- _framespec = []
- def __init__(self, *args, **kwargs):
- if len(args)==1 and len(kwargs)==0 and isinstance(args[0], type(self)):
- other = args[0]
- for checker in self._framespec:
- val = checker.validate(self, getattr(other, checker.name))
- setattr(self, checker.name, val)
- else:
- for checker, val in zip(self._framespec, args):
- setattr(self, checker.name, checker.validate(self, val))
- for checker in self._framespec[len(args):]:
- validated = checker.validate(
- self, kwargs.get(checker.name, None))
- setattr(self, checker.name, validated)
- HashKey = property(
- lambda s: s.FrameID,
- doc="an internal key used to ensure frame uniqueness in a tag")
- FrameID = property(
- lambda s: type(s).__name__,
- doc="ID3v2 three or four character frame ID")
- def __repr__(self):
- """Python representation of a frame.
- The string returned is a valid Python expression to construct
- a copy of this frame.
- """
- kw = []
- for attr in self._framespec:
- kw.append('%s=%r' % (attr.name, getattr(self, attr.name)))
- return '%s(%s)' % (type(self).__name__, ', '.join(kw))
- def _readData(self, data):
- odata = data
- for reader in self._framespec:
- if len(data):
- try: value, data = reader.read(self, data)
- except UnicodeDecodeError:
- raise ID3JunkFrameError
- else: raise ID3JunkFrameError
- setattr(self, reader.name, value)
- if data.strip('\x00'):
- warn('Leftover data: %s: %r (from %r)' % (
- type(self).__name__, data, odata),
- ID3Warning)
- def _writeData(self):
- data = []
- for writer in self._framespec:
- data.append(writer.write(self, getattr(self, writer.name)))
- return ''.join(data)
- def pprint(self):
- """Return a human-readable representation of the frame."""
- return "%s=%s" % (type(self).__name__, self._pprint())
- def _pprint(self):
- return "[unrepresentable data]"
- def fromData(cls, id3, tflags, data):
- """Construct this ID3 frame from raw string data."""
- if (2,4,0) <= id3.version:
- if tflags & (Frame.FLAG24_COMPRESS | Frame.FLAG24_DATALEN):
- # The data length int is syncsafe in 2.4 (but not 2.3).
- # However, we don't actually need the data length int,
- # except to work around a QL 0.12 bug, and in that case
- # all we need are the raw bytes.
- datalen_bytes = data[:4]
- data = data[4:]
- if tflags & Frame.FLAG24_UNSYNCH or id3.f_unsynch:
- try: data = unsynch.decode(data)
- except ValueError, err:
- if id3.PEDANTIC:
- raise ID3BadUnsynchData, '%s: %r' % (err, data)
- if tflags & Frame.FLAG24_ENCRYPT:
- raise ID3EncryptionUnsupportedError
- if tflags & Frame.FLAG24_COMPRESS:
- try: data = data.decode('zlib')
- except zlibError, err:
- # the initial mutagen that went out with QL 0.12 did not
- # write the 4 bytes of uncompressed size. Compensate.
- data = datalen_bytes + data
- try: data = data.decode('zlib')
- except zlibError, err:
- if id3.PEDANTIC:
- raise ID3BadCompressedData, '%s: %r' % (err, data)
- elif (2,3,0) <= id3.version:
- if tflags & Frame.FLAG23_COMPRESS:
- usize, = unpack('>L', data[:4])
- data = data[4:]
- if tflags & Frame.FLAG23_ENCRYPT:
- raise ID3EncryptionUnsupportedError
- if tflags & Frame.FLAG23_COMPRESS:
- try: data = data.decode('zlib')
- except zlibError, err:
- if id3.PEDANTIC:
- raise ID3BadCompressedData, '%s: %r' % (err, data)
- frame = cls()
- frame._rawdata = data
- frame._flags = tflags
- frame._readData(data)
- return frame
- fromData = classmethod(fromData)
- def __hash__(self):
- raise TypeError("Frame objects are unhashable")
- class FrameOpt(Frame):
- """A frame with optional parts.
- Some ID3 frames have optional data; this class extends Frame to
- provide support for those parts.
- """
- _optionalspec = []
- def __init__(self, *args, **kwargs):
- super(FrameOpt, self).__init__(*args, **kwargs)
- for spec in self._optionalspec:
- if spec.name in kwargs:
- validated = spec.validate(self, kwargs[spec.name])
- setattr(self, spec.name, validated)
- else: break
- def _readData(self, data):
- odata = data
- for reader in self._framespec:
- if len(data): value, data = reader.read(self, data)
- else: raise ID3JunkFrameError
- setattr(self, reader.name, value)
- if data:
- for reader in self._optionalspec:
- if len(data): value, data = reader.read(self, data)
- else: break
- setattr(self, reader.name, value)
- if data.strip('\x00'):
- warn('Leftover data: %s: %r (from %r)' % (
- type(self).__name__, data, odata),
- ID3Warning)
- def _writeData(self):
- data = []
- for writer in self._framespec:
- data.append(writer.write(self, getattr(self, writer.name)))
- for writer in self._optionalspec:
- try: data.append(writer.write(self, getattr(self, writer.name)))
- except AttributeError: break
- return ''.join(data)
- def __repr__(self):
- kw = []
- for attr in self._framespec:
- kw.append('%s=%r' % (attr.name, getattr(self, attr.name)))
- for attr in self._optionalspec:
- if hasattr(self, attr.name):
- kw.append('%s=%r' % (attr.name, getattr(self, attr.name)))
- return '%s(%s)' % (type(self).__name__, ', '.join(kw))
- class TextFrame(Frame):
- """Text strings.
- Text frames support casts to unicode or str objects, as well as
- list-like indexing, extend, and append.
- Iterating over a TextFrame iterates over its strings, not its
- characters.
- Text frames have a 'text' attribute which is the list of strings,
- and an 'encoding' attribute; 0 for ISO-8859 1, 1 UTF-16, 2 for
- UTF-16BE, and 3 for UTF-8. If you don't want to worry about
- encodings, just set it to 3.
- """
- _framespec = [ EncodingSpec('encoding'),
- MultiSpec('text', EncodedTextSpec('text'), sep=u'\u0000') ]
- def __str__(self): return self.__unicode__().encode('utf-8')
- def __unicode__(self): return u'\u0000'.join(self.text)
- def __eq__(self, other):
- if isinstance(other, str): return str(self) == other
- elif isinstance(other, unicode): return unicode(self) == other
- return self.text == other
- __hash__ = Frame.__hash__
- def __getitem__(self, item): return self.text[item]
- def __iter__(self): return iter(self.text)
- def append(self, value): return self.text.append(value)
- def extend(self, value): return self.text.extend(value)
- def _pprint(self): return " / ".join(self.text)
- class NumericTextFrame(TextFrame):
- """Numerical text strings.
- The numeric value of these frames can be gotten with unary plus, e.g.
- frame = TLEN('12345')
- length = +frame
- """
- _framespec = [ EncodingSpec('encoding'),
- MultiSpec('text', EncodedNumericTextSpec('text'), sep=u'\u0000') ]
- def __pos__(self):
- """Return the numerical value of the string."""
- return int(self.text[0])
- class NumericPartTextFrame(TextFrame):
- """Multivalue numerical text strings.
- These strings indicate 'part (e.g. track) X of Y', and unary plus
- returns the first value:
- frame = TRCK('4/15')
- track = +frame # track == 4
- """
- _framespec = [ EncodingSpec('encoding'),
- MultiSpec('text', EncodedNumericPartTextSpec('text'), sep=u'\u0000') ]
- def __pos__(self):
- return int(self.text[0].split("/")[0])
- class TimeStampTextFrame(TextFrame):
- """A list of time stamps.
- The 'text' attribute in this frame is a list of ID3TimeStamp
- objects, not a list of strings.
- """
- _framespec = [ EncodingSpec('encoding'),
- MultiSpec('text', TimeStampSpec('stamp'), sep=u',') ]
- def __str__(self): return self.__unicode__().encode('utf-8')
- def __unicode__(self): return ','.join([stamp.text for stamp in self.text])
- def _pprint(self):
- return " / ".join([stamp.text for stamp in self.text])
- class UrlFrame(Frame):
- """A frame containing a URL string.
- The ID3 specification is silent about IRIs and normalized URL
- forms. Mutagen assumes all URLs in files are encoded as Latin 1,
- but string conversion of this frame returns a UTF-8 representation
- for compatibility with other string conversions.
- The only sane way to handle URLs in MP3s is to restrict them to
- ASCII.
- """
- _framespec = [ Latin1TextSpec('url') ]
- def __str__(self): return self.url.encode('utf-8')
- def __unicode__(self): return self.url
- def __eq__(self, other): return self.url == other
- __hash__ = Frame.__hash__
- def _pprint(self): return self.url
- class UrlFrameU(UrlFrame):
- HashKey = property(lambda s: '%s:%s' % (s.FrameID, s.url))
- class TALB(TextFrame): "Album"
- class TBPM(NumericTextFrame): "Beats per minute"
- class TCOM(TextFrame): "Composer"
- class TCON(TextFrame):
- """Content type (Genre)
- ID3 has several ways genres can be represented; for convenience,
- use the 'genres' property rather than the 'text' attribute.
- """
- from mutagen._constants import GENRES
- def __get_genres(self):
- genres = []
- import re
- genre_re = re.compile(r"((?:\((?P<id>[0-9]+|RX|CR)\))*)(?P<str>.+)?")
- for value in self.text:
- if value.isdigit():
- try: genres.append(self.GENRES[int(value)])
- except IndexError: genres.append(u"Unknown")
- elif value == "CR": genres.append(u"Cover")
- elif value == "RX": genres.append(u"Remix")
- elif value:
- newgenres = []
- genreid, dummy, genrename = genre_re.match(value).groups()
- if genreid:
- for gid in genreid[1:-1].split(")("):
- if gid.isdigit() and int(gid) < len(self.GENRES):
- gid = unicode(self.GENRES[int(gid)])
- newgenres.append(gid)
- elif gid == "CR": newgenres.append(u"Cover")
- elif gid == "RX": newgenres.append(u"Remix")
- else: newgenres.append(u"Unknown")
- if genrename:
- # "Unescaping" the first parenthesis
- if genrename.startswith("(("): genrename = genrename[1:]
- if genrename not in newgenres: newgenres.append(genrename)
- genres.extend(newgenres)
- return genres
- def __set_genres(self, genres):
- if isinstance(genres, basestring): genres = [genres]
- self.text = map(self.__decode, genres)
- def __decode(self, value):
- if isinstance(value, str):
- enc = EncodedTextSpec._encodings[self.encoding][0]
- return value.decode(enc)
- else: return value
- genres = property(__get_genres, __set_genres, None,
- "A list of genres parsed from the raw text data.")
- def _pprint(self):
- return " / ".join(self.genres)
- class TCOP(TextFrame): "Copyright (c)"
- class TCMP(NumericTextFrame): "iTunes Compilation Flag"
- class TDAT(TextFrame): "Date of recording (DDMM)"
- class TDEN(TimeStampTextFrame): "Encoding Time"
- class TDOR(TimeStampTextFrame): "Original Release Time"
- class TDLY(NumericTextFrame): "Audio Delay (ms)"
- class TDRC(TimeStampTextFrame): "Recording Time"
- class TDRL(TimeStampTextFrame): "Release Time"
- class TDTG(TimeStampTextFrame): "Tagging Time"
- class TENC(TextFrame): "Encoder"
- class TEXT(TextFrame): "Lyricist"
- class TFLT(TextFrame): "File type"
- class TIME(TextFrame): "Time of recording (HHMM)"
- class TIT1(TextFrame): "Content group description"
- class TIT2(TextFrame): "Title"
- class TIT3(TextFrame): "Subtitle/Description refinement"
- class TKEY(TextFrame): "Starting Key"
- class TLAN(TextFrame): "Audio Languages"
- class TLEN(NumericTextFrame): "Audio Length (ms)"
- class TMED(TextFrame): "Source Media Type"
- class TMOO(TextFrame): "Mood"
- class TOAL(TextFrame): "Original Album"
- class TOFN(TextFrame): "Original Filename"
- class TOLY(TextFrame): "Original Lyricist"
- class TOPE(TextFrame): "Original Artist/Performer"
- class TORY(NumericTextFrame): "Original Release Year"
- class TOWN(TextFrame): "Owner/Licensee"
- class TPE1(TextFrame): "Lead Artist/Performer/Soloist/Group"
- class TPE2(TextFrame): "Band/Orchestra/Accompaniment"
- class TPE3(TextFrame): "Conductor"
- class TPE4(TextFrame): "Interpreter/Remixer/Modifier"
- class TPOS(NumericPartTextFrame): "Part of set"
- class TPRO(TextFrame): "Produced (P)"
- class TPUB(TextFrame): "Publisher"
- class TRCK(NumericPartTextFrame): "Track Number"
- class TRDA(TextFrame): "Recording Dates"
- class TRSN(TextFrame): "Internet Radio Station Name"
- class TRSO(TextFrame): "Internet Radio Station Owner"
- class TSIZ(NumericTextFrame): "Size of audio data (bytes)"
- class TSO2(TextFrame): "iTunes Album Artist Sort"
- class TSOA(TextFrame): "Album Sort Order key"
- class TSOC(TextFrame): "iTunes Composer Sort"
- class TSOP(TextFrame): "Perfomer Sort Order key"
- class TSOT(TextFrame): "Title Sort Order key"
- class TSRC(TextFrame): "International Standard Recording Code (ISRC)"
- class TSSE(TextFrame): "Encoder settings"
- class TSST(TextFrame): "Set Subtitle"
- class TYER(NumericTextFrame): "Year of recording"
- class TXXX(TextFrame):
- """User-defined text data.
- TXXX f…