/src/echonest/support/midi/MidiFileParser.py

http://echo-nest-remix.googlecode.com/ · Python · 192 lines · 89 code · 48 blank · 55 comment · 12 complexity · dee64d7e6c0ecadbe5681b7d18f1125b MD5 · raw file

  1. # -*- coding: ISO-8859-1 -*-
  2. # std library
  3. from struct import unpack
  4. # uhh I don't really like this, but there are so many constants to
  5. # import otherwise
  6. from constants import *
  7. from EventDispatcher import EventDispatcher
  8. class MidiFileParser:
  9. """
  10. The MidiFileParser is the lowest level parser that see the data as
  11. midi data. It generates events that gets triggered on the outstream.
  12. """
  13. def __init__(self, raw_in, outstream):
  14. """
  15. raw_data is the raw content of a midi file as a string.
  16. """
  17. # internal values, don't mess with 'em directly
  18. self.raw_in = raw_in
  19. self.dispatch = EventDispatcher(outstream)
  20. # Used to keep track of stuff
  21. self._running_status = None
  22. def parseMThdChunk(self):
  23. "Parses the header chunk"
  24. raw_in = self.raw_in
  25. header_chunk_type = raw_in.nextSlice(4)
  26. header_chunk_zise = raw_in.readBew(4)
  27. # check if it is a proper midi file
  28. if header_chunk_type != 'MThd':
  29. raise TypeError, "It is not a valid midi file!"
  30. # Header values are at fixed locations, so no reason to be clever
  31. self.format = raw_in.readBew(2)
  32. self.nTracks = raw_in.readBew(2)
  33. self.division = raw_in.readBew(2)
  34. # Theoretically a header larger than 6 bytes can exist
  35. # but no one has seen one in the wild
  36. # But correctly ignore unknown data if it is though
  37. if header_chunk_zise > 6:
  38. raw_in.moveCursor(header_chunk_zise-6)
  39. # call the header event handler on the stream
  40. self.dispatch.header(self.format, self.nTracks, self.division)
  41. def parseMTrkChunk(self):
  42. "Parses a track chunk. This is the most important part of the parser."
  43. # set time to 0 at start of a track
  44. self.dispatch.reset_time()
  45. dispatch = self.dispatch
  46. raw_in = self.raw_in
  47. # Trigger event at the start of a track
  48. dispatch.start_of_track(self._current_track)
  49. # position cursor after track header
  50. raw_in.moveCursor(4)
  51. # unsigned long is 4 bytes
  52. tracklength = raw_in.readBew(4)
  53. track_endposition = raw_in.getCursor() + tracklength # absolute position!
  54. while raw_in.getCursor() < track_endposition:
  55. # find relative time of the event
  56. time = raw_in.readVarLen()
  57. dispatch.update_time(time)
  58. # be aware of running status!!!!
  59. peak_ahead = raw_in.readBew(move_cursor=0)
  60. if (peak_ahead & 0x80):
  61. # the status byte has the high bit set, so it
  62. # was not running data but proper status byte
  63. status = self._running_status = raw_in.readBew()
  64. else:
  65. # use that darn running status
  66. status = self._running_status
  67. # could it be illegal data ?? Do we need to test for that?
  68. # I need more example midi files to be shure.
  69. # Also, while I am almost certain that no realtime
  70. # messages will pop up in a midi file, I might need to
  71. # change my mind later.
  72. # we need to look at nibbles here
  73. hi_nible, lo_nible = status & 0xF0, status & 0x0F
  74. # match up with events
  75. # Is it a meta_event ??
  76. # these only exists in midi files, not in transmitted midi data
  77. # In transmitted data META_EVENT (0xFF) is a system reset
  78. if status == META_EVENT:
  79. meta_type = raw_in.readBew()
  80. meta_length = raw_in.readVarLen()
  81. meta_data = raw_in.nextSlice(meta_length)
  82. dispatch.meta_event(meta_type, meta_data)
  83. # Is it a sysex_event ??
  84. elif status == SYSTEM_EXCLUSIVE:
  85. # ignore sysex events
  86. sysex_length = raw_in.readVarLen()
  87. # don't read sysex terminator
  88. sysex_data = raw_in.nextSlice(sysex_length-1)
  89. # only read last data byte if it is a sysex terminator
  90. # It should allways be there, but better safe than sorry
  91. if raw_in.readBew(move_cursor=0) == END_OFF_EXCLUSIVE:
  92. eo_sysex = raw_in.readBew()
  93. dispatch.sysex_event(sysex_data)
  94. # the sysex code has not been properly tested, and might be fishy!
  95. # is it a system common event?
  96. elif hi_nible == 0xF0: # Hi bits are set then
  97. data_sizes = {
  98. MTC:1,
  99. SONG_POSITION_POINTER:2,
  100. SONG_SELECT:1,
  101. }
  102. data_size = data_sizes.get(hi_nible, 0)
  103. common_data = raw_in.nextSlice(data_size)
  104. common_type = lo_nible
  105. dispatch.system_common(common_type, common_data)
  106. # Oh! Then it must be a midi event (channel voice message)
  107. else:
  108. data_sizes = {
  109. PATCH_CHANGE:1,
  110. CHANNEL_PRESSURE:1,
  111. NOTE_OFF:2,
  112. NOTE_ON:2,
  113. AFTERTOUCH:2,
  114. CONTINUOUS_CONTROLLER:2,
  115. PITCH_BEND:2,
  116. }
  117. data_size = data_sizes.get(hi_nible, 0)
  118. channel_data = raw_in.nextSlice(data_size)
  119. event_type, channel = hi_nible, lo_nible
  120. dispatch.channel_messages(event_type, channel, channel_data)
  121. def parseMTrkChunks(self):
  122. "Parses all track chunks."
  123. for t in range(self.nTracks):
  124. self._current_track = t
  125. self.parseMTrkChunk() # this is where it's at!
  126. self.dispatch.eof()
  127. if __name__ == '__main__':
  128. # get data
  129. test_file = 'test/midifiles/minimal.mid'
  130. test_file = 'test/midifiles/cubase-minimal.mid'
  131. test_file = 'test/midifiles/Lola.mid'
  132. # f = open(test_file, 'rb')
  133. # raw_data = f.read()
  134. # f.close()
  135. #
  136. #
  137. # # do parsing
  138. from MidiToText import MidiToText
  139. from RawInstreamFile import RawInstreamFile
  140. midi_in = MidiFileParser(RawInstreamFile(test_file), MidiToText())
  141. midi_in.parseMThdChunk()
  142. midi_in.parseMTrkChunks()