PageRenderTime 323ms CodeModel.GetById 139ms app.highlight 65ms RepoModel.GetById 116ms app.codeStats 0ms

/examples/videx/vafromb.py

http://echo-nest-remix.googlecode.com/
Python | 193 lines | 181 code | 3 blank | 9 comment | 0 complexity | b465171b59eb38d4ca53b2132fa645f7 MD5 | raw file
  1#!/usr/bin/env python
  2# encoding: utf=8
  3
  4"""
  5vafromb.py
  6
  7Re-synthesize video A using the segments of video B.
  8
  9By Ben Lacker, 2009-02-24.
 10"""
 11import numpy
 12import sys
 13import time
 14
 15from echonest import action, audio, video
 16
 17usage="""
 18Usage:
 19    python vafromb.py <inputfilenameA> <inputfilenameB> <outputfilename> <Mix> [env]
 20
 21Example:
 22    python vafromb.py BillieJean.mp4 CryMeARiver.mp4 BillieJeanFromCryMeARiver.mp4 0.9 env
 23
 24The 'env' flag applies the volume envelopes of the segments of A to those
 25from B.
 26
 27Mix is a number 0-1 that determines the relative mix of the resynthesized
 28song and the original input A. i.e. a mix value of 0.9 yields an output that
 29is mostly the resynthesized version.
 30
 31"""
 32
 33class AfromB(object):
 34    def __init__(self, input_filename_a, input_filename_b, output_filename):
 35        "Synchronizes slavebundle on masterbundle, writes to outbundle"
 36        self.master = video.loadav(input_filename_a)
 37        # convert slave so it matches master's settings
 38        converted = video.convertmov(input_filename_b, settings=self.master.video.settings)
 39        self.slave = video.loadav(converted)
 40        self.out = output_filename
 41        
 42        self.input_a = self.master.audio
 43        self.input_b = self.slave.audio
 44        self.segs_a = self.input_a.analysis.segments
 45        self.segs_b = self.input_b.analysis.segments
 46        self.output_filename = output_filename
 47    
 48    def calculate_distances(self, a):
 49        distance_matrix = numpy.zeros((len(self.segs_b), 4), dtype=numpy.float32)
 50        pitch_distances = []
 51        timbre_distances = []
 52        loudmax_distances = []
 53        for b in self.segs_b:
 54            pitch_diff = numpy.subtract(b.pitches,a.pitches)
 55            pitch_distances.append(numpy.sum(numpy.square(pitch_diff)))
 56            timbre_diff = numpy.subtract(b.timbre,a.timbre)
 57            timbre_distances.append(numpy.sum(numpy.square(timbre_diff)))
 58            loudmax_diff = b.loudness_begin - a.loudness_begin
 59            loudmax_distances.append(numpy.square(loudmax_diff))
 60        distance_matrix[:,0] = pitch_distances
 61        distance_matrix[:,1] = timbre_distances
 62        distance_matrix[:,2] = loudmax_distances
 63        distance_matrix[:,3] = range(len(self.segs_b))
 64        distance_matrix = self.normalize_distance_matrix(distance_matrix)
 65        return distance_matrix
 66    
 67    def normalize_distance_matrix(self, mat, mode='minmed'):
 68        """ Normalize a distance matrix on a per column basis.
 69        """
 70        if mode == 'minstd':
 71            mini = numpy.min(mat,0)
 72            m = numpy.subtract(mat, mini)
 73            std = numpy.std(mat,0)
 74            m = numpy.divide(m, std)
 75            m = numpy.divide(m, mat.shape[1])
 76        elif mode == 'minmed':
 77            mini = numpy.min(mat,0)
 78            m = numpy.subtract(mat, mini)
 79            med = numpy.median(m)
 80            m = numpy.divide(m, med)
 81            m = numpy.divide(m, mat.shape[1])
 82        elif mode == 'std':
 83            std = numpy.std(mat,0)
 84            m = numpy.divide(mat, std)
 85            m = numpy.divide(m, mat.shape[1])
 86        return m
 87    
 88    def run(self, mix=0.5, envelope=False):
 89        dur = len(self.input_a.data) + 100000 # another two seconds
 90        # determine shape of new array. 
 91        # do everything in mono; I'm not fancy.
 92        new_shape = (dur,)
 93        new_channels = 1
 94        self.input_a = action.make_mono(self.input_a)
 95        self.input_b = action.make_mono(self.input_b)
 96        out = audio.AudioData(shape=new_shape, sampleRate=self.input_b.sampleRate, numChannels=new_channels)
 97        for a in self.segs_a:
 98            seg_index = a.absolute_context()[0]
 99            # find best match from segs in B
100            distance_matrix = self.calculate_distances(a)
101            distances = [numpy.sqrt(x[0]+x[1]+x[2]) for x in distance_matrix]
102            match = self.segs_b[distances.index(min(distances))]
103            segment_data = self.input_b[match]
104            reference_data = self.input_a[a]
105            if segment_data.endindex < reference_data.endindex:
106                if new_channels > 1:
107                    silence_shape = (reference_data.endindex,new_channels)
108                else:
109                    silence_shape = (reference_data.endindex,)
110                new_segment = audio.AudioData(shape=silence_shape,
111                                        sampleRate=out.sampleRate,
112                                        numChannels=segment_data.numChannels)
113                new_segment.append(segment_data)
114                new_segment.endindex = len(new_segment)
115                segment_data = new_segment
116            elif segment_data.endindex > reference_data.endindex:
117                index = slice(0, int(reference_data.endindex), 1)
118                segment_data = audio.AudioData(None,segment_data.data[index],
119                                        sampleRate=segment_data.sampleRate)
120
121            chopvideo = self.slave.video[match] # get editableframes object
122            masterchop = self.master.video[a]
123            startframe = self.master.video.indexvoodo(a.start) # find start index
124            endframe = self.master.video.indexvoodo(a.start + a.duration)
125            for i in xrange(len(chopvideo.files)):
126                if startframe+i < len(self.master.video.files):
127                    self.master.video.files[startframe+i] = chopvideo.files[i]
128            last_frame = chopvideo.files[i]
129            for i in xrange(len(chopvideo.files), len(masterchop.files)):
130                if startframe+i < len(self.master.video.files):
131                    self.master.video.files[startframe+i] = last_frame
132                
133            if envelope:
134                # db -> voltage ratio http://www.mogami.com/e/cad/db.html
135                linear_max_volume = pow(10.0,a.loudness_max/20.0)
136                linear_start_volume = pow(10.0,a.loudness_begin/20.0)
137                if(seg_index == len(self.segs_a)-1): # if this is the last segment
138                    linear_next_start_volume = 0
139                else:
140                    linear_next_start_volume = pow(10.0,self.segs_a[seg_index+1].loudness_begin/20.0)
141                    pass
142                when_max_volume = a.time_loudness_max
143                # Count # of ticks I wait doing volume ramp so I can fix up rounding errors later.
144                ss = 0
145                # Set volume of this segment. Start at the start volume, ramp up to the max volume , then ramp back down to the next start volume.
146                cur_vol = float(linear_start_volume)
147                # Do the ramp up to max from start
148                samps_to_max_loudness_from_here = int(segment_data.sampleRate * when_max_volume)
149                if(samps_to_max_loudness_from_here > 0):
150                    how_much_volume_to_increase_per_samp = float(linear_max_volume - linear_start_volume)/float(samps_to_max_loudness_from_here)
151                    for samps in xrange(samps_to_max_loudness_from_here):
152                        try:
153                            segment_data.data[ss] *= cur_vol
154                        except IndexError:
155                            pass
156                        cur_vol = cur_vol + how_much_volume_to_increase_per_samp
157                        ss = ss + 1
158                # Now ramp down from max to start of next seg
159                samps_to_next_segment_from_here = int(segment_data.sampleRate * (a.duration-when_max_volume))
160                if(samps_to_next_segment_from_here > 0):
161                    how_much_volume_to_decrease_per_samp = float(linear_max_volume - linear_next_start_volume)/float(samps_to_next_segment_from_here)
162                    for samps in xrange(samps_to_next_segment_from_here):
163                        cur_vol = cur_vol - how_much_volume_to_decrease_per_samp
164                        try:
165                            segment_data.data[ss] *= cur_vol
166                        except IndexError:
167                            pass
168                        ss = ss + 1
169            mixed_data = audio.mix(segment_data,reference_data,mix=mix)
170            out.append(mixed_data)
171        self.master.audio = out
172        self.master.save(self.output_filename)
173
174def main():
175    try:
176        input_filename_a = sys.argv[1]
177        input_filename_b = sys.argv[2]
178        output_filename = sys.argv[3]
179        mix = sys.argv[4]
180        if len(sys.argv) == 6:
181            env = True
182        else:
183            env = False
184    except Exception:
185        print usage
186        sys.exit(-1)
187    AfromB(input_filename_a, input_filename_b, output_filename).run(mix=mix, envelope=env)
188
189if __name__=='__main__':
190    tic = time.time()
191    main()
192    toc = time.time()
193    print "Elapsed time: %.3f sec" % float(toc-tic)