/examples/videx/vafromb.py

http://echo-nest-remix.googlecode.com/ · Python · 193 lines · 161 code · 12 blank · 20 comment · 31 complexity · b465171b59eb38d4ca53b2132fa645f7 MD5 · raw file

  1. #!/usr/bin/env python
  2. # encoding: utf=8
  3. """
  4. vafromb.py
  5. Re-synthesize video A using the segments of video B.
  6. By Ben Lacker, 2009-02-24.
  7. """
  8. import numpy
  9. import sys
  10. import time
  11. from echonest import action, audio, video
  12. usage="""
  13. Usage:
  14. python vafromb.py <inputfilenameA> <inputfilenameB> <outputfilename> <Mix> [env]
  15. Example:
  16. python vafromb.py BillieJean.mp4 CryMeARiver.mp4 BillieJeanFromCryMeARiver.mp4 0.9 env
  17. The 'env' flag applies the volume envelopes of the segments of A to those
  18. from B.
  19. Mix is a number 0-1 that determines the relative mix of the resynthesized
  20. song and the original input A. i.e. a mix value of 0.9 yields an output that
  21. is mostly the resynthesized version.
  22. """
  23. class AfromB(object):
  24. def __init__(self, input_filename_a, input_filename_b, output_filename):
  25. "Synchronizes slavebundle on masterbundle, writes to outbundle"
  26. self.master = video.loadav(input_filename_a)
  27. # convert slave so it matches master's settings
  28. converted = video.convertmov(input_filename_b, settings=self.master.video.settings)
  29. self.slave = video.loadav(converted)
  30. self.out = output_filename
  31. self.input_a = self.master.audio
  32. self.input_b = self.slave.audio
  33. self.segs_a = self.input_a.analysis.segments
  34. self.segs_b = self.input_b.analysis.segments
  35. self.output_filename = output_filename
  36. def calculate_distances(self, a):
  37. distance_matrix = numpy.zeros((len(self.segs_b), 4), dtype=numpy.float32)
  38. pitch_distances = []
  39. timbre_distances = []
  40. loudmax_distances = []
  41. for b in self.segs_b:
  42. pitch_diff = numpy.subtract(b.pitches,a.pitches)
  43. pitch_distances.append(numpy.sum(numpy.square(pitch_diff)))
  44. timbre_diff = numpy.subtract(b.timbre,a.timbre)
  45. timbre_distances.append(numpy.sum(numpy.square(timbre_diff)))
  46. loudmax_diff = b.loudness_begin - a.loudness_begin
  47. loudmax_distances.append(numpy.square(loudmax_diff))
  48. distance_matrix[:,0] = pitch_distances
  49. distance_matrix[:,1] = timbre_distances
  50. distance_matrix[:,2] = loudmax_distances
  51. distance_matrix[:,3] = range(len(self.segs_b))
  52. distance_matrix = self.normalize_distance_matrix(distance_matrix)
  53. return distance_matrix
  54. def normalize_distance_matrix(self, mat, mode='minmed'):
  55. """ Normalize a distance matrix on a per column basis.
  56. """
  57. if mode == 'minstd':
  58. mini = numpy.min(mat,0)
  59. m = numpy.subtract(mat, mini)
  60. std = numpy.std(mat,0)
  61. m = numpy.divide(m, std)
  62. m = numpy.divide(m, mat.shape[1])
  63. elif mode == 'minmed':
  64. mini = numpy.min(mat,0)
  65. m = numpy.subtract(mat, mini)
  66. med = numpy.median(m)
  67. m = numpy.divide(m, med)
  68. m = numpy.divide(m, mat.shape[1])
  69. elif mode == 'std':
  70. std = numpy.std(mat,0)
  71. m = numpy.divide(mat, std)
  72. m = numpy.divide(m, mat.shape[1])
  73. return m
  74. def run(self, mix=0.5, envelope=False):
  75. dur = len(self.input_a.data) + 100000 # another two seconds
  76. # determine shape of new array.
  77. # do everything in mono; I'm not fancy.
  78. new_shape = (dur,)
  79. new_channels = 1
  80. self.input_a = action.make_mono(self.input_a)
  81. self.input_b = action.make_mono(self.input_b)
  82. out = audio.AudioData(shape=new_shape, sampleRate=self.input_b.sampleRate, numChannels=new_channels)
  83. for a in self.segs_a:
  84. seg_index = a.absolute_context()[0]
  85. # find best match from segs in B
  86. distance_matrix = self.calculate_distances(a)
  87. distances = [numpy.sqrt(x[0]+x[1]+x[2]) for x in distance_matrix]
  88. match = self.segs_b[distances.index(min(distances))]
  89. segment_data = self.input_b[match]
  90. reference_data = self.input_a[a]
  91. if segment_data.endindex < reference_data.endindex:
  92. if new_channels > 1:
  93. silence_shape = (reference_data.endindex,new_channels)
  94. else:
  95. silence_shape = (reference_data.endindex,)
  96. new_segment = audio.AudioData(shape=silence_shape,
  97. sampleRate=out.sampleRate,
  98. numChannels=segment_data.numChannels)
  99. new_segment.append(segment_data)
  100. new_segment.endindex = len(new_segment)
  101. segment_data = new_segment
  102. elif segment_data.endindex > reference_data.endindex:
  103. index = slice(0, int(reference_data.endindex), 1)
  104. segment_data = audio.AudioData(None,segment_data.data[index],
  105. sampleRate=segment_data.sampleRate)
  106. chopvideo = self.slave.video[match] # get editableframes object
  107. masterchop = self.master.video[a]
  108. startframe = self.master.video.indexvoodo(a.start) # find start index
  109. endframe = self.master.video.indexvoodo(a.start + a.duration)
  110. for i in xrange(len(chopvideo.files)):
  111. if startframe+i < len(self.master.video.files):
  112. self.master.video.files[startframe+i] = chopvideo.files[i]
  113. last_frame = chopvideo.files[i]
  114. for i in xrange(len(chopvideo.files), len(masterchop.files)):
  115. if startframe+i < len(self.master.video.files):
  116. self.master.video.files[startframe+i] = last_frame
  117. if envelope:
  118. # db -> voltage ratio http://www.mogami.com/e/cad/db.html
  119. linear_max_volume = pow(10.0,a.loudness_max/20.0)
  120. linear_start_volume = pow(10.0,a.loudness_begin/20.0)
  121. if(seg_index == len(self.segs_a)-1): # if this is the last segment
  122. linear_next_start_volume = 0
  123. else:
  124. linear_next_start_volume = pow(10.0,self.segs_a[seg_index+1].loudness_begin/20.0)
  125. pass
  126. when_max_volume = a.time_loudness_max
  127. # Count # of ticks I wait doing volume ramp so I can fix up rounding errors later.
  128. ss = 0
  129. # Set volume of this segment. Start at the start volume, ramp up to the max volume , then ramp back down to the next start volume.
  130. cur_vol = float(linear_start_volume)
  131. # Do the ramp up to max from start
  132. samps_to_max_loudness_from_here = int(segment_data.sampleRate * when_max_volume)
  133. if(samps_to_max_loudness_from_here > 0):
  134. how_much_volume_to_increase_per_samp = float(linear_max_volume - linear_start_volume)/float(samps_to_max_loudness_from_here)
  135. for samps in xrange(samps_to_max_loudness_from_here):
  136. try:
  137. segment_data.data[ss] *= cur_vol
  138. except IndexError:
  139. pass
  140. cur_vol = cur_vol + how_much_volume_to_increase_per_samp
  141. ss = ss + 1
  142. # Now ramp down from max to start of next seg
  143. samps_to_next_segment_from_here = int(segment_data.sampleRate * (a.duration-when_max_volume))
  144. if(samps_to_next_segment_from_here > 0):
  145. how_much_volume_to_decrease_per_samp = float(linear_max_volume - linear_next_start_volume)/float(samps_to_next_segment_from_here)
  146. for samps in xrange(samps_to_next_segment_from_here):
  147. cur_vol = cur_vol - how_much_volume_to_decrease_per_samp
  148. try:
  149. segment_data.data[ss] *= cur_vol
  150. except IndexError:
  151. pass
  152. ss = ss + 1
  153. mixed_data = audio.mix(segment_data,reference_data,mix=mix)
  154. out.append(mixed_data)
  155. self.master.audio = out
  156. self.master.save(self.output_filename)
  157. def main():
  158. try:
  159. input_filename_a = sys.argv[1]
  160. input_filename_b = sys.argv[2]
  161. output_filename = sys.argv[3]
  162. mix = sys.argv[4]
  163. if len(sys.argv) == 6:
  164. env = True
  165. else:
  166. env = False
  167. except Exception:
  168. print usage
  169. sys.exit(-1)
  170. AfromB(input_filename_a, input_filename_b, output_filename).run(mix=mix, envelope=env)
  171. if __name__=='__main__':
  172. tic = time.time()
  173. main()
  174. toc = time.time()
  175. print "Elapsed time: %.3f sec" % float(toc-tic)