PageRenderTime 1147ms CodeModel.GetById 584ms app.highlight 233ms RepoModel.GetById 204ms app.codeStats 61ms

/examples/capsule/capsule_support.py

http://echo-nest-remix.googlecode.com/
Python | 368 lines | 235 code | 73 blank | 60 comment | 55 complexity | ef521368aa9481eb55b2176eb3c3ef85 MD5 | raw file
  1#!/usr/bin/env python
  2# encoding: utf=8
  3
  4"""
  5capsule_support.py
  6
  7Created by Tristan Jehan and Jason Sundram.
  8"""
  9
 10import numpy as np
 11from copy import deepcopy
 12from echonest.action import Crossfade, Playback, Crossmatch, Fadein, Fadeout, humanize_time
 13from utils import rows, flatten
 14
 15# constants for now
 16X_FADE = 3
 17FADE_IN = 0.25
 18FADE_OUT = 6
 19MIN_SEARCH = 4
 20MIN_MARKERS = 2
 21MIN_ALIGN_DURATION = 3
 22LOUDNESS_THRESH = -8
 23FUSION_INTERVAL = .06   # this is what we use in the analyzer
 24AVG_PEAK_OFFSET = 0.025 # Estimated time between onset and peak of segment.
 25
 26# TODO: this should probably be in actions?
 27def display_actions(actions):
 28    total = 0
 29    print
 30    for a in actions:
 31        print "%s\t  %s" % (humanize_time(total), unicode(a))
 32        total += a.duration
 33    print
 34
 35def evaluate_distance(mat1, mat2):
 36    return np.linalg.norm(mat1.flatten() - mat2.flatten())
 37
 38def upsample_matrix(m):
 39    """ Upsample matrices by a factor of 2."""
 40    r, c = m.shape
 41    out = np.zeros((2*r, c), dtype=np.float32)
 42    for i in xrange(r):
 43        out[i*2  , :] = m[i, :]
 44        out[i*2+1, :] = m[i, :]
 45    return out
 46
 47def upsample_list(l, rate=2):
 48    """ Upsample lists by a factor of 2."""
 49    if rate != 2: return l[:]
 50    # Assume we're an AudioQuantumList.
 51    def split(x):
 52        a = deepcopy(x)
 53        a.duration = x.duration / 2
 54        b = deepcopy(a)
 55        b.start = x.start + a.duration
 56        return a, b
 57    
 58    return flatten(map(split, l))
 59
 60def average_duration(l):
 61    return sum([i.duration for i in l]) / float(len(l))
 62
 63def align(track1, track2, mat1, mat2):
 64    """ Constrained search between a settled section and a new section.
 65        Outputs location in mat2 and the number of rows used in the transition.
 66    """
 67    # Get the average marker duration.
 68    marker1 = average_duration(getattr(track1.analysis, track1.resampled['rate'])[track1.resampled['index']:track1.resampled['index']+rows(mat1)])
 69    marker2 = average_duration(getattr(track2.analysis, track2.resampled['rate'])[track2.resampled['index']:track2.resampled['index']+rows(mat2)])
 70
 71    def get_adjustment(tr1, tr2):
 72        """Update tatum rate if necessary"""
 73        dist = np.log2(tr1 / tr2)
 74        if  dist < -0.5: return (1, 2)
 75        elif dist > 0.5: return (2, 1)
 76        else:            return (1, 1)
 77    
 78    rate1, rate2 = get_adjustment(marker1, marker2)
 79    if rate1 == 2: mat1 = upsample_matrix(mat1)
 80    if rate2 == 2: mat2 = upsample_matrix(mat2)
 81    
 82    # Update sizes.
 83    rows2 = rows(mat2)
 84    rows1 = min( rows(mat1), max(rows2 - MIN_SEARCH, MIN_MARKERS)) # at least the best of MIN_SEARCH choices
 85    
 86    # Search for minimum.
 87    def dist(i):
 88        return evaluate_distance(mat1[0:rows1,:], mat2[i:i+rows1,:])
 89    
 90    min_loc = min(xrange(rows2 - rows1), key=dist)
 91    min_val = dist(min_loc)
 92    
 93    # Let's make sure track2 ends its transition on a regular tatum.
 94    if rate2 == 2 and (min_loc + rows1) & 1: 
 95        rows1 -= 1
 96    
 97    return min_loc, rows1, rate1, rate2
 98
 99def equalize_tracks(tracks):
100    
101    def db_2_volume(loudness):
102        return (1.0 - LOUDNESS_THRESH * (LOUDNESS_THRESH - loudness) / 100.0)
103    
104    for track in tracks:
105        loudness = track.analysis.loudness
106        track.gain = db_2_volume(loudness)
107    
108def order_tracks(tracks):
109    """ Finds the smoothest ordering between tracks, based on tempo only."""
110    tempos = [track.analysis.tempo['value'] for track in tracks]
111    median = np.median(tempos)
112    def fold(t):
113        q = np.log2(t / median)
114        if  q < -.5: return t * 2.0
115        elif q > .5: return t / 2.0
116        else:        return t
117        
118    new_tempos = map(fold, tempos)
119    order = np.argsort(new_tempos)
120    return [tracks[i] for i in order]
121
122def is_valid(track, inter, transition):
123    markers = getattr(track.analysis, track.resampled['rate'])
124    if len(markers) < 1:
125        dur = track.duration
126    else:
127        dur = markers[-1].start + markers[-1].duration - markers[0].start
128    return inter + 2 * transition < dur
129
130def get_central(analysis, member='segments'):
131    """ Returns a tuple: 
132        1) copy of the members (e.g. segments) between end_of_fade_in and start_of_fade_out.
133        2) the index of the first retained member.
134    """
135    def central(s):
136        return analysis.end_of_fade_in <= s.start and (s.start + s.duration) < analysis.start_of_fade_out
137    
138    members = getattr(analysis, member) # this is nicer than data.__dict__[member]
139    ret = filter(central, members[:]) 
140    index = members.index(ret[0]) if ret else 0
141    
142    return ret, index
143
144def get_mean_offset(segments, markers):
145    if segments == markers:
146        return 0
147    
148    index = 0
149    offsets = []
150    try:
151        for marker in markers:
152            while segments[index].start < marker.start + FUSION_INTERVAL:
153                offset = abs(marker.start - segments[index].start)
154                if offset < FUSION_INTERVAL:
155                    offsets.append(offset)
156                index += 1
157    except IndexError, e:
158        pass
159    
160    return np.average(offsets) if offsets else AVG_PEAK_OFFSET
161    
162def resample_features(data, rate='tatums', feature='timbre'):
163    """
164    Resample segment features to a given rate within fade boundaries.
165    @param data: analysis object.
166    @param rate: one of the following: segments, tatums, beats, bars.
167    @param feature: either timbre or pitch.
168    @return A dictionary including a numpy matrix of size len(rate) x 12, a rate, and an index
169    """
170    ret = {'rate': rate, 'index': 0, 'cursor': 0, 'matrix': np.zeros((1, 12), dtype=np.float32)}
171    segments, ind = get_central(data.analysis, 'segments')
172    markers, ret['index'] = get_central(data.analysis, rate)
173
174    if len(segments) < 2 or len(markers) < 2:
175        return ret
176        
177    # Find the optimal attack offset
178    meanOffset = get_mean_offset(segments, markers)
179    tmp_markers = deepcopy(markers)
180    
181    # Apply the offset
182    for m in tmp_markers:
183        m.start -= meanOffset
184        if m.start < 0: m.start = 0
185    
186    # Allocate output matrix, give it alias mat for convenience.
187    mat = ret['matrix'] = np.zeros((len(tmp_markers)-1, 12), dtype=np.float32)
188    
189    # Find the index of the segment that corresponds to the first marker
190    f = lambda x: tmp_markers[0].start < x.start + x.duration
191    index = (i for i,x in enumerate(segments) if f(x)).next()
192    
193    # Do the resampling
194    try:
195        for (i, m) in enumerate(tmp_markers):
196            while segments[index].start + segments[index].duration < m.start + m.duration:
197                dur = segments[index].duration
198                if segments[index].start < m.start:
199                    dur -= m.start - segments[index].start
200                
201                C = min(dur / m.duration, 1)
202                
203                mat[i, 0:12] += C * np.array(getattr(segments[index], feature))
204                index += 1
205                
206            C = min( (m.duration + m.start - segments[index].start) / m.duration, 1)
207            mat[i, 0:12] += C * np.array(getattr(segments[index], feature))
208    except IndexError, e:
209        pass # avoid breaking with index > len(segments)
210        
211    return ret
212
213def column_whiten(mat):
214    """ Zero mean, unit variance on a column basis"""
215    m = mat - np.mean(mat,0)
216    return m / np.std(m,0)
217
218def timbre_whiten(mat):
219    if rows(mat) < 2: return mat
220    m = np.zeros((rows(mat), 12), dtype=np.float32)
221    m[:,0] = mat[:,0] - np.mean(mat[:,0],0)
222    m[:,0] = m[:,0] / np.std(m[:,0],0)
223    m[:,1:] = mat[:,1:] - np.mean(mat[:,1:].flatten(),0)
224    m[:,1:] = m[:,1:] / np.std(m[:,1:].flatten(),0) # use this!
225    return m
226
227def move_cursor(track, duration, cursor, buf=MIN_MARKERS):
228    dur = 0
229    while dur < duration and cursor < rows(track.resampled['matrix']) - buf:
230        markers = getattr(track.analysis, track.resampled['rate'])    
231        dur += markers[track.resampled['index'] + cursor].duration
232        cursor += 1
233    return dur, cursor
234
235def get_mat_out(track, transition):
236    """ Find and output the matrix to use in the next alignment.
237        Assumes that track.resampled exists.
238    """
239    cursor = track.resampled['cursor']
240    mat = track.resampled['matrix']
241    # update cursor location to after the transition
242    duration, cursor = move_cursor(track, transition, cursor)
243    # output matrix with a proper number of rows, from beginning of transition
244    return mat[track.resampled['cursor']:cursor,:]
245
246def get_mat_in(track, transition, inter):
247    """ Find and output the search matrix to use in the next alignment.
248        Assumes that track.resampled exists.
249    """
250    # search from the start
251    cursor = 0
252    track.resampled['cursor'] = cursor
253    mat = track.resampled['matrix']
254    
255    # compute search zone by anticipating what's playing after the transition
256    marker_end = getattr(track.analysis, track.resampled['rate'])[track.resampled['index'] + rows(mat)].start
257    marker_start = getattr(track.analysis, track.resampled['rate'])[track.resampled['index']].start
258    search_dur = (marker_end - marker_start) - inter - 2 * transition
259    
260    if search_dur < 0: 
261        return mat[:MIN_MARKERS,:]
262    
263    # find what the location is in rows
264    duration, cursor = move_cursor(track, search_dur, cursor)
265    
266    return mat[:cursor,:]
267
268def make_crossfade(track1, track2, inter):
269
270    markers1 = getattr(track1.analysis, track1.resampled['rate'])    
271    
272    if len(markers1) < MIN_SEARCH:
273        start1 = track1.resampled['cursor']
274    else:
275        start1 = markers1[track1.resampled['index'] + track1.resampled['cursor']].start
276
277    start2 = max((track2.analysis.duration - (inter + 2 * X_FADE)) / 2, 0)
278    markers2 = getattr(track2.analysis, track2.resampled['rate'])
279    
280    if len(markers2) < MIN_SEARCH:
281        track2.resampled['cursor'] = start2 + X_FADE + inter
282        dur = min(track2.analysis.duration - 2 * X_FADE, inter)
283    else:
284        duration, track2.resampled['cursor'] = move_cursor(track2, start2+X_FADE+inter, 0)
285        dur = markers2[track2.resampled['index'] + track2.resampled['cursor']].start - X_FADE - start2
286
287    xf = Crossfade((track1, track2), (start1, start2), X_FADE)
288    pb = Playback(track2, start2 + X_FADE, dur)
289
290    return [xf, pb]
291
292def make_crossmatch(track1, track2, rate1, rate2, loc2, rows):
293    markers1 = upsample_list(getattr(track1.analysis, track1.resampled['rate']), rate1)
294    markers2 = upsample_list(getattr(track2.analysis, track2.resampled['rate']), rate2)
295    
296    def to_tuples(l, i, n):
297        return [(t.start, t.duration) for t in l[i : i + n]]
298    
299    start1 = rate1 * (track1.resampled['index'] + track1.resampled['cursor'])
300    start2 = loc2 + rate2 * track2.resampled['index'] # loc2 has already been multiplied by rate2
301
302    return Crossmatch((track1, track2), (to_tuples(markers1, start1, rows), to_tuples(markers2, start2, rows)))
303    
304def make_transition(track1, track2, inter, transition):
305    # the minimal transition is 2 markers
306    # the minimal inter is 0 sec
307    markers1 = getattr(track1.analysis, track1.resampled['rate'])
308    markers2 = getattr(track2.analysis, track2.resampled['rate'])
309    
310    if len(markers1) < MIN_SEARCH or len(markers2) < MIN_SEARCH:
311        return make_crossfade(track1, track2, inter)
312    
313    # though the minimal transition is 2 markers, the alignment is on at least 3 seconds
314    mat1 = get_mat_out(track1, max(transition, MIN_ALIGN_DURATION))
315    mat2 = get_mat_in(track2, max(transition, MIN_ALIGN_DURATION), inter)
316    
317    try:
318        loc, n, rate1, rate2 = align(track1, track2, mat1, mat2)
319    except:
320        return make_crossfade(track1, track2, inter)
321        
322    if transition < MIN_ALIGN_DURATION:
323        duration, cursor = move_cursor(track2, transition, loc)
324        n = max(cursor-loc, MIN_MARKERS)
325    
326    xm = make_crossmatch(track1, track2, rate1, rate2, loc, n)
327    # loc and n are both in terms of potentially upsampled data. 
328    # Divide by rate here to get end_crossmatch in terms of the original data.
329    end_crossmatch = (loc + n) / rate2
330    
331    if markers2[-1].start < markers2[end_crossmatch].start + inter + transition:
332        inter = max(markers2[-1].start - transition, 0)
333        
334    # move_cursor sets the cursor properly for subsequent operations, and gives us duration.
335    dur, track2.resampled['cursor'] = move_cursor(track2, inter, end_crossmatch)
336    pb = Playback(track2, sum(xm.l2[-1]), dur)
337    
338    return [xm, pb]
339
340def initialize(track, inter, transition):
341    """find initial cursor location"""
342    mat = track.resampled['matrix']
343    markers = getattr(track.analysis, track.resampled['rate'])
344
345    try:
346        # compute duration of matrix
347        mat_dur = markers[track.resampled['index'] + rows(mat)].start - markers[track.resampled['index']].start
348        start = (mat_dur - inter - transition - FADE_IN) / 2
349        dur = start + FADE_IN + inter
350        # move cursor to transition marker
351        duration, track.resampled['cursor'] = move_cursor(track, dur, 0)
352        # work backwards to find the exact locations of initial fade in and playback sections
353        fi = Fadein(track, markers[track.resampled['index'] + track.resampled['cursor']].start - inter - FADE_IN, FADE_IN)
354        pb = Playback(track, markers[track.resampled['index'] + track.resampled['cursor']].start - inter, inter)
355    except:
356        track.resampled['cursor'] = FADE_IN + inter
357        fi = Fadein(track, 0, FADE_IN)
358        pb = Playback(track, FADE_IN, inter)
359
360    return [fi, pb]
361    
362def terminate(track, fade):
363    """ Deal with last fade out"""
364    cursor = track.resampled['cursor']
365    markers = getattr(track.analysis, track.resampled['rate'])
366    if MIN_SEARCH <= len(markers):
367        cursor = markers[track.resampled['index'] + cursor].start
368    return [Fadeout(track, cursor, min(fade, track.duration-cursor))]