/examples/afromb/afromb.py

http://echo-nest-remix.googlecode.com/ · Python · 176 lines · 150 code · 8 blank · 18 comment · 28 complexity · 23e0cfdab2a38bdf5a7f84875b0181aa MD5 · raw file

  1. #!/usr/bin/env python
  2. # encoding: utf=8
  3. """
  4. afromb.py
  5. Re-synthesize song A using the segments of song B.
  6. By Ben Lacker, 2009-02-24.
  7. """
  8. import numpy
  9. import sys
  10. import time
  11. import echonest.audio as audio
  12. usage="""
  13. Usage:
  14. python afromb.py <inputfilenameA> <inputfilenameB> <outputfilename> <Mix> [env]
  15. Example:
  16. python afromb.py BillieJean.mp3 CryMeARiver.mp3 BillieJeanFromCryMeARiver.mp3 0.9 env
  17. The 'env' flag applies the volume envelopes of the segments of A to those
  18. from B.
  19. Mix is a number 0-1 that determines the relative mix of the resynthesized
  20. song and the original input A. i.e. a mix value of 0.9 yields an output that
  21. is mostly the resynthesized version.
  22. """
  23. class AfromB(object):
  24. def __init__(self, input_filename_a, input_filename_b, output_filename):
  25. self.input_a = audio.LocalAudioFile(input_filename_a)
  26. self.input_b = audio.LocalAudioFile(input_filename_b)
  27. self.segs_a = self.input_a.analysis.segments
  28. self.segs_b = self.input_b.analysis.segments
  29. self.output_filename = output_filename
  30. def calculate_distances(self, a):
  31. distance_matrix = numpy.zeros((len(self.segs_b), 4),
  32. dtype=numpy.float32)
  33. pitch_distances = []
  34. timbre_distances = []
  35. loudmax_distances = []
  36. for b in self.segs_b:
  37. pitch_diff = numpy.subtract(b.pitches,a.pitches)
  38. pitch_distances.append(numpy.sum(numpy.square(pitch_diff)))
  39. timbre_diff = numpy.subtract(b.timbre,a.timbre)
  40. timbre_distances.append(numpy.sum(numpy.square(timbre_diff)))
  41. loudmax_diff = b.loudness_begin - a.loudness_begin
  42. loudmax_distances.append(numpy.square(loudmax_diff))
  43. distance_matrix[:,0] = pitch_distances
  44. distance_matrix[:,1] = timbre_distances
  45. distance_matrix[:,2] = loudmax_distances
  46. distance_matrix[:,3] = range(len(self.segs_b))
  47. distance_matrix = self.normalize_distance_matrix(distance_matrix)
  48. return distance_matrix
  49. def normalize_distance_matrix(self, mat, mode='minmed'):
  50. """ Normalize a distance matrix on a per column basis.
  51. """
  52. if mode == 'minstd':
  53. mini = numpy.min(mat,0)
  54. m = numpy.subtract(mat, mini)
  55. std = numpy.std(mat,0)
  56. m = numpy.divide(m, std)
  57. m = numpy.divide(m, mat.shape[1])
  58. elif mode == 'minmed':
  59. mini = numpy.min(mat,0)
  60. m = numpy.subtract(mat, mini)
  61. med = numpy.median(m)
  62. m = numpy.divide(m, med)
  63. m = numpy.divide(m, mat.shape[1])
  64. elif mode == 'std':
  65. std = numpy.std(mat,0)
  66. m = numpy.divide(mat, std)
  67. m = numpy.divide(m, mat.shape[1])
  68. return m
  69. def run(self, mix=0.5, envelope=False):
  70. dur = len(self.input_a.data) + 100000 # another two seconds
  71. # determine shape of new array
  72. if len(self.input_a.data.shape) > 1:
  73. new_shape = (dur, self.input_a.data.shape[1])
  74. new_channels = self.input_a.data.shape[1]
  75. else:
  76. new_shape = (dur,)
  77. new_channels = 1
  78. out = audio.AudioData(shape=new_shape,
  79. sampleRate=self.input_b.sampleRate,
  80. numChannels=new_channels)
  81. for a in self.segs_a:
  82. seg_index = a.absolute_context()[0]
  83. # find best match from segs in B
  84. distance_matrix = self.calculate_distances(a)
  85. distances = [numpy.sqrt(x[0]+x[1]+x[2]) for x in distance_matrix]
  86. match = self.segs_b[distances.index(min(distances))]
  87. segment_data = self.input_b[match]
  88. reference_data = self.input_a[a]
  89. if segment_data.endindex < reference_data.endindex:
  90. if new_channels > 1:
  91. silence_shape = (reference_data.endindex,new_channels)
  92. else:
  93. silence_shape = (reference_data.endindex,)
  94. new_segment = audio.AudioData(shape=silence_shape,
  95. sampleRate=out.sampleRate,
  96. numChannels=segment_data.numChannels)
  97. new_segment.append(segment_data)
  98. new_segment.endindex = len(new_segment)
  99. segment_data = new_segment
  100. elif segment_data.endindex > reference_data.endindex:
  101. index = slice(0, int(reference_data.endindex), 1)
  102. segment_data = audio.AudioData(None,segment_data.data[index],
  103. sampleRate=segment_data.sampleRate)
  104. if envelope:
  105. # db -> voltage ratio http://www.mogami.com/e/cad/db.html
  106. linear_max_volume = pow(10.0,a.loudness_max/20.0)
  107. linear_start_volume = pow(10.0,a.loudness_begin/20.0)
  108. if(seg_index == len(self.segs_a)-1): # if this is the last segment
  109. linear_next_start_volume = 0
  110. else:
  111. linear_next_start_volume = pow(10.0,self.segs_a[seg_index+1].loudness_begin/20.0)
  112. pass
  113. when_max_volume = a.time_loudness_max
  114. # Count # of ticks I wait doing volume ramp so I can fix up rounding errors later.
  115. ss = 0
  116. # Set volume of this segment. Start at the start volume, ramp up to the max volume , then ramp back down to the next start volume.
  117. cur_vol = float(linear_start_volume)
  118. # Do the ramp up to max from start
  119. samps_to_max_loudness_from_here = int(segment_data.sampleRate * when_max_volume)
  120. if(samps_to_max_loudness_from_here > 0):
  121. how_much_volume_to_increase_per_samp = float(linear_max_volume - linear_start_volume)/float(samps_to_max_loudness_from_here)
  122. for samps in xrange(samps_to_max_loudness_from_here):
  123. try:
  124. segment_data.data[ss] *= cur_vol
  125. except IndexError:
  126. pass
  127. cur_vol = cur_vol + how_much_volume_to_increase_per_samp
  128. ss = ss + 1
  129. # Now ramp down from max to start of next seg
  130. samps_to_next_segment_from_here = int(segment_data.sampleRate * (a.duration-when_max_volume))
  131. if(samps_to_next_segment_from_here > 0):
  132. how_much_volume_to_decrease_per_samp = float(linear_max_volume - linear_next_start_volume)/float(samps_to_next_segment_from_here)
  133. for samps in xrange(samps_to_next_segment_from_here):
  134. cur_vol = cur_vol - how_much_volume_to_decrease_per_samp
  135. try:
  136. segment_data.data[ss] *= cur_vol
  137. except IndexError:
  138. pass
  139. ss = ss + 1
  140. mixed_data = audio.mix(segment_data,reference_data,mix=mix)
  141. out.append(mixed_data)
  142. out.encode(self.output_filename)
  143. def main():
  144. try:
  145. input_filename_a = sys.argv[1]
  146. input_filename_b = sys.argv[2]
  147. output_filename = sys.argv[3]
  148. mix = sys.argv[4]
  149. if len(sys.argv) == 6:
  150. env = True
  151. else:
  152. env = False
  153. except:
  154. print usage
  155. sys.exit(-1)
  156. AfromB(input_filename_a, input_filename_b, output_filename).run(mix=mix,
  157. envelope=env)
  158. if __name__=='__main__':
  159. tic = time.time()
  160. main()
  161. toc = time.time()
  162. print "Elapsed time: %.3f sec" % float(toc-tic)