PageRenderTime 40ms CodeModel.GetById 18ms app.highlight 18ms RepoModel.GetById 2ms app.codeStats 0ms

/examples/afromb/afromb.py

http://echo-nest-remix.googlecode.com/
Python | 176 lines | 165 code | 2 blank | 9 comment | 0 complexity | 23e0cfdab2a38bdf5a7f84875b0181aa MD5 | raw file
  1#!/usr/bin/env python
  2# encoding: utf=8
  3
  4"""
  5afromb.py
  6
  7Re-synthesize song A using the segments of song B.
  8
  9By Ben Lacker, 2009-02-24.
 10"""
 11import numpy
 12import sys
 13import time
 14import echonest.audio as audio
 15
 16usage="""
 17Usage:
 18    python afromb.py <inputfilenameA> <inputfilenameB> <outputfilename> <Mix> [env]
 19
 20Example:
 21    python afromb.py BillieJean.mp3 CryMeARiver.mp3 BillieJeanFromCryMeARiver.mp3 0.9 env
 22
 23The 'env' flag applies the volume envelopes of the segments of A to those
 24from B.
 25
 26Mix is a number 0-1 that determines the relative mix of the resynthesized
 27song and the original input A. i.e. a mix value of 0.9 yields an output that
 28is mostly the resynthesized version.
 29
 30"""
 31
 32class AfromB(object):
 33    def __init__(self, input_filename_a, input_filename_b, output_filename):
 34        self.input_a = audio.LocalAudioFile(input_filename_a)
 35        self.input_b = audio.LocalAudioFile(input_filename_b)
 36        self.segs_a = self.input_a.analysis.segments
 37        self.segs_b = self.input_b.analysis.segments
 38        self.output_filename = output_filename
 39
 40    def calculate_distances(self, a):
 41        distance_matrix = numpy.zeros((len(self.segs_b), 4),
 42                                        dtype=numpy.float32)
 43        pitch_distances = []
 44        timbre_distances = []
 45        loudmax_distances = []
 46        for b in self.segs_b:
 47            pitch_diff = numpy.subtract(b.pitches,a.pitches)
 48            pitch_distances.append(numpy.sum(numpy.square(pitch_diff)))
 49            timbre_diff = numpy.subtract(b.timbre,a.timbre)
 50            timbre_distances.append(numpy.sum(numpy.square(timbre_diff)))
 51            loudmax_diff = b.loudness_begin - a.loudness_begin
 52            loudmax_distances.append(numpy.square(loudmax_diff))
 53        distance_matrix[:,0] = pitch_distances
 54        distance_matrix[:,1] = timbre_distances
 55        distance_matrix[:,2] = loudmax_distances
 56        distance_matrix[:,3] = range(len(self.segs_b))
 57        distance_matrix = self.normalize_distance_matrix(distance_matrix)
 58        return distance_matrix
 59
 60    def normalize_distance_matrix(self, mat, mode='minmed'):
 61        """ Normalize a distance matrix on a per column basis.
 62        """
 63        if mode == 'minstd':
 64            mini = numpy.min(mat,0)
 65            m = numpy.subtract(mat, mini)
 66            std = numpy.std(mat,0)
 67            m = numpy.divide(m, std)
 68            m = numpy.divide(m, mat.shape[1])
 69        elif mode == 'minmed':
 70            mini = numpy.min(mat,0)
 71            m = numpy.subtract(mat, mini)
 72            med = numpy.median(m)
 73            m = numpy.divide(m, med)
 74            m = numpy.divide(m, mat.shape[1])
 75        elif mode == 'std':
 76            std = numpy.std(mat,0)
 77            m = numpy.divide(mat, std)
 78            m = numpy.divide(m, mat.shape[1])
 79        return m
 80
 81    def run(self, mix=0.5, envelope=False):
 82        dur = len(self.input_a.data) + 100000 # another two seconds
 83        # determine shape of new array
 84        if len(self.input_a.data.shape) > 1:
 85            new_shape = (dur, self.input_a.data.shape[1])
 86            new_channels = self.input_a.data.shape[1]
 87        else:
 88            new_shape = (dur,)
 89            new_channels = 1
 90        out = audio.AudioData(shape=new_shape,
 91                            sampleRate=self.input_b.sampleRate,
 92                            numChannels=new_channels)
 93        for a in self.segs_a:
 94            seg_index = a.absolute_context()[0]
 95            # find best match from segs in B
 96            distance_matrix = self.calculate_distances(a)
 97            distances = [numpy.sqrt(x[0]+x[1]+x[2]) for x in distance_matrix]
 98            match = self.segs_b[distances.index(min(distances))]
 99            segment_data = self.input_b[match]
100            reference_data = self.input_a[a]
101            if segment_data.endindex < reference_data.endindex:
102                if new_channels > 1:
103                    silence_shape = (reference_data.endindex,new_channels)
104                else:
105                    silence_shape = (reference_data.endindex,)
106                new_segment = audio.AudioData(shape=silence_shape,
107                                        sampleRate=out.sampleRate,
108                                        numChannels=segment_data.numChannels)
109                new_segment.append(segment_data)
110                new_segment.endindex = len(new_segment)
111                segment_data = new_segment
112            elif segment_data.endindex > reference_data.endindex:
113                index = slice(0, int(reference_data.endindex), 1)
114                segment_data = audio.AudioData(None,segment_data.data[index],
115                                        sampleRate=segment_data.sampleRate)
116            if envelope:
117                # db -> voltage ratio http://www.mogami.com/e/cad/db.html
118                linear_max_volume = pow(10.0,a.loudness_max/20.0)
119                linear_start_volume = pow(10.0,a.loudness_begin/20.0)
120                if(seg_index == len(self.segs_a)-1): # if this is the last segment
121                    linear_next_start_volume = 0
122                else:
123                    linear_next_start_volume = pow(10.0,self.segs_a[seg_index+1].loudness_begin/20.0)
124                    pass
125                when_max_volume = a.time_loudness_max
126                # Count # of ticks I wait doing volume ramp so I can fix up rounding errors later.
127                ss = 0
128                # Set volume of this segment. Start at the start volume, ramp up to the max volume , then ramp back down to the next start volume.
129                cur_vol = float(linear_start_volume)
130                # Do the ramp up to max from start
131                samps_to_max_loudness_from_here = int(segment_data.sampleRate * when_max_volume)
132                if(samps_to_max_loudness_from_here > 0):
133                    how_much_volume_to_increase_per_samp = float(linear_max_volume - linear_start_volume)/float(samps_to_max_loudness_from_here)
134                    for samps in xrange(samps_to_max_loudness_from_here):
135                        try:
136                            segment_data.data[ss] *= cur_vol
137                        except IndexError:
138                            pass
139                        cur_vol = cur_vol + how_much_volume_to_increase_per_samp
140                        ss = ss + 1
141                # Now ramp down from max to start of next seg
142                samps_to_next_segment_from_here = int(segment_data.sampleRate * (a.duration-when_max_volume))
143                if(samps_to_next_segment_from_here > 0):
144                    how_much_volume_to_decrease_per_samp = float(linear_max_volume - linear_next_start_volume)/float(samps_to_next_segment_from_here)
145                    for samps in xrange(samps_to_next_segment_from_here):
146                        cur_vol = cur_vol - how_much_volume_to_decrease_per_samp
147                        try:
148                            segment_data.data[ss] *= cur_vol
149                        except IndexError:
150                            pass
151                        ss = ss + 1
152            mixed_data = audio.mix(segment_data,reference_data,mix=mix)
153            out.append(mixed_data)
154        out.encode(self.output_filename)
155
156def main():
157    try:
158        input_filename_a = sys.argv[1]
159        input_filename_b = sys.argv[2]
160        output_filename = sys.argv[3]
161        mix = sys.argv[4]
162        if len(sys.argv) == 6:
163            env = True
164        else:
165            env = False
166    except:
167        print usage
168        sys.exit(-1)
169    AfromB(input_filename_a, input_filename_b, output_filename).run(mix=mix,
170                                                                envelope=env)
171
172if __name__=='__main__':
173    tic = time.time()
174    main()
175    toc = time.time()
176    print "Elapsed time: %.3f sec" % float(toc-tic)