/examples/capsule/capsule_support.py

http://echo-nest-remix.googlecode.com/ · Python · 368 lines · 235 code · 73 blank · 60 comment · 57 complexity · ef521368aa9481eb55b2176eb3c3ef85 MD5 · raw file

  1. #!/usr/bin/env python
  2. # encoding: utf=8
  3. """
  4. capsule_support.py
  5. Created by Tristan Jehan and Jason Sundram.
  6. """
  7. import numpy as np
  8. from copy import deepcopy
  9. from echonest.action import Crossfade, Playback, Crossmatch, Fadein, Fadeout, humanize_time
  10. from utils import rows, flatten
  11. # constants for now
  12. X_FADE = 3
  13. FADE_IN = 0.25
  14. FADE_OUT = 6
  15. MIN_SEARCH = 4
  16. MIN_MARKERS = 2
  17. MIN_ALIGN_DURATION = 3
  18. LOUDNESS_THRESH = -8
  19. FUSION_INTERVAL = .06 # this is what we use in the analyzer
  20. AVG_PEAK_OFFSET = 0.025 # Estimated time between onset and peak of segment.
  21. # TODO: this should probably be in actions?
  22. def display_actions(actions):
  23. total = 0
  24. print
  25. for a in actions:
  26. print "%s\t %s" % (humanize_time(total), unicode(a))
  27. total += a.duration
  28. print
  29. def evaluate_distance(mat1, mat2):
  30. return np.linalg.norm(mat1.flatten() - mat2.flatten())
  31. def upsample_matrix(m):
  32. """ Upsample matrices by a factor of 2."""
  33. r, c = m.shape
  34. out = np.zeros((2*r, c), dtype=np.float32)
  35. for i in xrange(r):
  36. out[i*2 , :] = m[i, :]
  37. out[i*2+1, :] = m[i, :]
  38. return out
  39. def upsample_list(l, rate=2):
  40. """ Upsample lists by a factor of 2."""
  41. if rate != 2: return l[:]
  42. # Assume we're an AudioQuantumList.
  43. def split(x):
  44. a = deepcopy(x)
  45. a.duration = x.duration / 2
  46. b = deepcopy(a)
  47. b.start = x.start + a.duration
  48. return a, b
  49. return flatten(map(split, l))
  50. def average_duration(l):
  51. return sum([i.duration for i in l]) / float(len(l))
  52. def align(track1, track2, mat1, mat2):
  53. """ Constrained search between a settled section and a new section.
  54. Outputs location in mat2 and the number of rows used in the transition.
  55. """
  56. # Get the average marker duration.
  57. marker1 = average_duration(getattr(track1.analysis, track1.resampled['rate'])[track1.resampled['index']:track1.resampled['index']+rows(mat1)])
  58. marker2 = average_duration(getattr(track2.analysis, track2.resampled['rate'])[track2.resampled['index']:track2.resampled['index']+rows(mat2)])
  59. def get_adjustment(tr1, tr2):
  60. """Update tatum rate if necessary"""
  61. dist = np.log2(tr1 / tr2)
  62. if dist < -0.5: return (1, 2)
  63. elif dist > 0.5: return (2, 1)
  64. else: return (1, 1)
  65. rate1, rate2 = get_adjustment(marker1, marker2)
  66. if rate1 == 2: mat1 = upsample_matrix(mat1)
  67. if rate2 == 2: mat2 = upsample_matrix(mat2)
  68. # Update sizes.
  69. rows2 = rows(mat2)
  70. rows1 = min( rows(mat1), max(rows2 - MIN_SEARCH, MIN_MARKERS)) # at least the best of MIN_SEARCH choices
  71. # Search for minimum.
  72. def dist(i):
  73. return evaluate_distance(mat1[0:rows1,:], mat2[i:i+rows1,:])
  74. min_loc = min(xrange(rows2 - rows1), key=dist)
  75. min_val = dist(min_loc)
  76. # Let's make sure track2 ends its transition on a regular tatum.
  77. if rate2 == 2 and (min_loc + rows1) & 1:
  78. rows1 -= 1
  79. return min_loc, rows1, rate1, rate2
  80. def equalize_tracks(tracks):
  81. def db_2_volume(loudness):
  82. return (1.0 - LOUDNESS_THRESH * (LOUDNESS_THRESH - loudness) / 100.0)
  83. for track in tracks:
  84. loudness = track.analysis.loudness
  85. track.gain = db_2_volume(loudness)
  86. def order_tracks(tracks):
  87. """ Finds the smoothest ordering between tracks, based on tempo only."""
  88. tempos = [track.analysis.tempo['value'] for track in tracks]
  89. median = np.median(tempos)
  90. def fold(t):
  91. q = np.log2(t / median)
  92. if q < -.5: return t * 2.0
  93. elif q > .5: return t / 2.0
  94. else: return t
  95. new_tempos = map(fold, tempos)
  96. order = np.argsort(new_tempos)
  97. return [tracks[i] for i in order]
  98. def is_valid(track, inter, transition):
  99. markers = getattr(track.analysis, track.resampled['rate'])
  100. if len(markers) < 1:
  101. dur = track.duration
  102. else:
  103. dur = markers[-1].start + markers[-1].duration - markers[0].start
  104. return inter + 2 * transition < dur
  105. def get_central(analysis, member='segments'):
  106. """ Returns a tuple:
  107. 1) copy of the members (e.g. segments) between end_of_fade_in and start_of_fade_out.
  108. 2) the index of the first retained member.
  109. """
  110. def central(s):
  111. return analysis.end_of_fade_in <= s.start and (s.start + s.duration) < analysis.start_of_fade_out
  112. members = getattr(analysis, member) # this is nicer than data.__dict__[member]
  113. ret = filter(central, members[:])
  114. index = members.index(ret[0]) if ret else 0
  115. return ret, index
  116. def get_mean_offset(segments, markers):
  117. if segments == markers:
  118. return 0
  119. index = 0
  120. offsets = []
  121. try:
  122. for marker in markers:
  123. while segments[index].start < marker.start + FUSION_INTERVAL:
  124. offset = abs(marker.start - segments[index].start)
  125. if offset < FUSION_INTERVAL:
  126. offsets.append(offset)
  127. index += 1
  128. except IndexError, e:
  129. pass
  130. return np.average(offsets) if offsets else AVG_PEAK_OFFSET
  131. def resample_features(data, rate='tatums', feature='timbre'):
  132. """
  133. Resample segment features to a given rate within fade boundaries.
  134. @param data: analysis object.
  135. @param rate: one of the following: segments, tatums, beats, bars.
  136. @param feature: either timbre or pitch.
  137. @return A dictionary including a numpy matrix of size len(rate) x 12, a rate, and an index
  138. """
  139. ret = {'rate': rate, 'index': 0, 'cursor': 0, 'matrix': np.zeros((1, 12), dtype=np.float32)}
  140. segments, ind = get_central(data.analysis, 'segments')
  141. markers, ret['index'] = get_central(data.analysis, rate)
  142. if len(segments) < 2 or len(markers) < 2:
  143. return ret
  144. # Find the optimal attack offset
  145. meanOffset = get_mean_offset(segments, markers)
  146. tmp_markers = deepcopy(markers)
  147. # Apply the offset
  148. for m in tmp_markers:
  149. m.start -= meanOffset
  150. if m.start < 0: m.start = 0
  151. # Allocate output matrix, give it alias mat for convenience.
  152. mat = ret['matrix'] = np.zeros((len(tmp_markers)-1, 12), dtype=np.float32)
  153. # Find the index of the segment that corresponds to the first marker
  154. f = lambda x: tmp_markers[0].start < x.start + x.duration
  155. index = (i for i,x in enumerate(segments) if f(x)).next()
  156. # Do the resampling
  157. try:
  158. for (i, m) in enumerate(tmp_markers):
  159. while segments[index].start + segments[index].duration < m.start + m.duration:
  160. dur = segments[index].duration
  161. if segments[index].start < m.start:
  162. dur -= m.start - segments[index].start
  163. C = min(dur / m.duration, 1)
  164. mat[i, 0:12] += C * np.array(getattr(segments[index], feature))
  165. index += 1
  166. C = min( (m.duration + m.start - segments[index].start) / m.duration, 1)
  167. mat[i, 0:12] += C * np.array(getattr(segments[index], feature))
  168. except IndexError, e:
  169. pass # avoid breaking with index > len(segments)
  170. return ret
  171. def column_whiten(mat):
  172. """ Zero mean, unit variance on a column basis"""
  173. m = mat - np.mean(mat,0)
  174. return m / np.std(m,0)
  175. def timbre_whiten(mat):
  176. if rows(mat) < 2: return mat
  177. m = np.zeros((rows(mat), 12), dtype=np.float32)
  178. m[:,0] = mat[:,0] - np.mean(mat[:,0],0)
  179. m[:,0] = m[:,0] / np.std(m[:,0],0)
  180. m[:,1:] = mat[:,1:] - np.mean(mat[:,1:].flatten(),0)
  181. m[:,1:] = m[:,1:] / np.std(m[:,1:].flatten(),0) # use this!
  182. return m
  183. def move_cursor(track, duration, cursor, buf=MIN_MARKERS):
  184. dur = 0
  185. while dur < duration and cursor < rows(track.resampled['matrix']) - buf:
  186. markers = getattr(track.analysis, track.resampled['rate'])
  187. dur += markers[track.resampled['index'] + cursor].duration
  188. cursor += 1
  189. return dur, cursor
  190. def get_mat_out(track, transition):
  191. """ Find and output the matrix to use in the next alignment.
  192. Assumes that track.resampled exists.
  193. """
  194. cursor = track.resampled['cursor']
  195. mat = track.resampled['matrix']
  196. # update cursor location to after the transition
  197. duration, cursor = move_cursor(track, transition, cursor)
  198. # output matrix with a proper number of rows, from beginning of transition
  199. return mat[track.resampled['cursor']:cursor,:]
  200. def get_mat_in(track, transition, inter):
  201. """ Find and output the search matrix to use in the next alignment.
  202. Assumes that track.resampled exists.
  203. """
  204. # search from the start
  205. cursor = 0
  206. track.resampled['cursor'] = cursor
  207. mat = track.resampled['matrix']
  208. # compute search zone by anticipating what's playing after the transition
  209. marker_end = getattr(track.analysis, track.resampled['rate'])[track.resampled['index'] + rows(mat)].start
  210. marker_start = getattr(track.analysis, track.resampled['rate'])[track.resampled['index']].start
  211. search_dur = (marker_end - marker_start) - inter - 2 * transition
  212. if search_dur < 0:
  213. return mat[:MIN_MARKERS,:]
  214. # find what the location is in rows
  215. duration, cursor = move_cursor(track, search_dur, cursor)
  216. return mat[:cursor,:]
  217. def make_crossfade(track1, track2, inter):
  218. markers1 = getattr(track1.analysis, track1.resampled['rate'])
  219. if len(markers1) < MIN_SEARCH:
  220. start1 = track1.resampled['cursor']
  221. else:
  222. start1 = markers1[track1.resampled['index'] + track1.resampled['cursor']].start
  223. start2 = max((track2.analysis.duration - (inter + 2 * X_FADE)) / 2, 0)
  224. markers2 = getattr(track2.analysis, track2.resampled['rate'])
  225. if len(markers2) < MIN_SEARCH:
  226. track2.resampled['cursor'] = start2 + X_FADE + inter
  227. dur = min(track2.analysis.duration - 2 * X_FADE, inter)
  228. else:
  229. duration, track2.resampled['cursor'] = move_cursor(track2, start2+X_FADE+inter, 0)
  230. dur = markers2[track2.resampled['index'] + track2.resampled['cursor']].start - X_FADE - start2
  231. xf = Crossfade((track1, track2), (start1, start2), X_FADE)
  232. pb = Playback(track2, start2 + X_FADE, dur)
  233. return [xf, pb]
  234. def make_crossmatch(track1, track2, rate1, rate2, loc2, rows):
  235. markers1 = upsample_list(getattr(track1.analysis, track1.resampled['rate']), rate1)
  236. markers2 = upsample_list(getattr(track2.analysis, track2.resampled['rate']), rate2)
  237. def to_tuples(l, i, n):
  238. return [(t.start, t.duration) for t in l[i : i + n]]
  239. start1 = rate1 * (track1.resampled['index'] + track1.resampled['cursor'])
  240. start2 = loc2 + rate2 * track2.resampled['index'] # loc2 has already been multiplied by rate2
  241. return Crossmatch((track1, track2), (to_tuples(markers1, start1, rows), to_tuples(markers2, start2, rows)))
  242. def make_transition(track1, track2, inter, transition):
  243. # the minimal transition is 2 markers
  244. # the minimal inter is 0 sec
  245. markers1 = getattr(track1.analysis, track1.resampled['rate'])
  246. markers2 = getattr(track2.analysis, track2.resampled['rate'])
  247. if len(markers1) < MIN_SEARCH or len(markers2) < MIN_SEARCH:
  248. return make_crossfade(track1, track2, inter)
  249. # though the minimal transition is 2 markers, the alignment is on at least 3 seconds
  250. mat1 = get_mat_out(track1, max(transition, MIN_ALIGN_DURATION))
  251. mat2 = get_mat_in(track2, max(transition, MIN_ALIGN_DURATION), inter)
  252. try:
  253. loc, n, rate1, rate2 = align(track1, track2, mat1, mat2)
  254. except:
  255. return make_crossfade(track1, track2, inter)
  256. if transition < MIN_ALIGN_DURATION:
  257. duration, cursor = move_cursor(track2, transition, loc)
  258. n = max(cursor-loc, MIN_MARKERS)
  259. xm = make_crossmatch(track1, track2, rate1, rate2, loc, n)
  260. # loc and n are both in terms of potentially upsampled data.
  261. # Divide by rate here to get end_crossmatch in terms of the original data.
  262. end_crossmatch = (loc + n) / rate2
  263. if markers2[-1].start < markers2[end_crossmatch].start + inter + transition:
  264. inter = max(markers2[-1].start - transition, 0)
  265. # move_cursor sets the cursor properly for subsequent operations, and gives us duration.
  266. dur, track2.resampled['cursor'] = move_cursor(track2, inter, end_crossmatch)
  267. pb = Playback(track2, sum(xm.l2[-1]), dur)
  268. return [xm, pb]
  269. def initialize(track, inter, transition):
  270. """find initial cursor location"""
  271. mat = track.resampled['matrix']
  272. markers = getattr(track.analysis, track.resampled['rate'])
  273. try:
  274. # compute duration of matrix
  275. mat_dur = markers[track.resampled['index'] + rows(mat)].start - markers[track.resampled['index']].start
  276. start = (mat_dur - inter - transition - FADE_IN) / 2
  277. dur = start + FADE_IN + inter
  278. # move cursor to transition marker
  279. duration, track.resampled['cursor'] = move_cursor(track, dur, 0)
  280. # work backwards to find the exact locations of initial fade in and playback sections
  281. fi = Fadein(track, markers[track.resampled['index'] + track.resampled['cursor']].start - inter - FADE_IN, FADE_IN)
  282. pb = Playback(track, markers[track.resampled['index'] + track.resampled['cursor']].start - inter, inter)
  283. except:
  284. track.resampled['cursor'] = FADE_IN + inter
  285. fi = Fadein(track, 0, FADE_IN)
  286. pb = Playback(track, FADE_IN, inter)
  287. return [fi, pb]
  288. def terminate(track, fade):
  289. """ Deal with last fade out"""
  290. cursor = track.resampled['cursor']
  291. markers = getattr(track.analysis, track.resampled['rate'])
  292. if MIN_SEARCH <= len(markers):
  293. cursor = markers[track.resampled['index'] + cursor].start
  294. return [Fadeout(track, cursor, min(fade, track.duration-cursor))]