PageRenderTime 61ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/src/echonest/audio.py

http://echo-nest-remix.googlecode.com/
Python | 1688 lines | 1552 code | 38 blank | 98 comment | 100 complexity | 600447a1cf6b3d7256ca1d8f80b95e91 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. """
  2. The main `Echo Nest`_ `Remix API`_ module for manipulating audio files and
  3. their associated `Echo Nest`_ `Analyze API`_ analyses.
  4. AudioData, and getpieces by Robert Ochshorn
  5. on 2008-06-06. Some refactoring and everything else by Joshua Lifton
  6. 2008-09-07. Refactoring by Ben Lacker 2009-02-11. Other contributions
  7. by Adam Lindsay.
  8. :group Base Classes: AudioAnalysis, AudioRenderable, AudioData, AudioData32
  9. :group Audio-plus-Analysis Classes: AudioFile, LocalAudioFile, LocalAnalysis
  10. :group Building Blocks: AudioQuantum, AudioSegment, AudioQuantumList, ModifiedRenderable
  11. :group Effects: AudioEffect, LevelDB, AmplitudeFactor, TimeTruncateFactor, TimeTruncateLength, Simultaneous
  12. :group Exception Classes: FileTypeError, EchoNestRemixError
  13. :group Audio helper functions: getpieces, mix, assemble, megamix
  14. :group ffmpeg helper functions: ffmpeg, settings_from_ffmpeg, ffmpeg_error_check
  15. :group Utility functions: chain_from_mixed, _dataParser, _attributeParser, _segmentsParser
  16. .. _Analyze API: http://developer.echonest.com/pages/overview?version=2
  17. .. _Remix API: http://code.google.com/p/echo-nest-remix/
  18. .. _Echo Nest: http://the.echonest.com/
  19. """
  20. __version__ = "$Revision: 0 $"
  21. # $Source$
  22. import hashlib
  23. import numpy
  24. import os
  25. import sys
  26. import StringIO
  27. import struct
  28. import subprocess
  29. import tempfile
  30. import wave
  31. from pyechonest import track
  32. import pyechonest.util
  33. import echonest.selection as selection
  34. import pyechonest.config as config
  35. #from echonest.support import stupidxml
  36. import xml.etree.ElementTree as etree
  37. import xml.dom.minidom as minidom
  38. import weakref
  39. class AudioAnalysis(object):
  40. """
  41. This class uses (but does not wrap) `pyechonest.track` to allow
  42. transparent caching of the audio analysis of an audio file.
  43. For example, the following script will display the bars of a track
  44. twice::
  45. from echonest import *
  46. a = audio.AudioAnalysis('YOUR_TRACK_ID_HERE')
  47. a.bars
  48. a.bars
  49. The first time `a.bars` is called, a network request is made of the
  50. `Echo Nest`_ `Analyze API`_. The second time time `a.bars` is called, the
  51. cached value is returned immediately.
  52. An `AudioAnalysis` object can be created using an existing ID, as in
  53. the example above, or by specifying the audio file to upload in
  54. order to create the ID, as in::
  55. a = audio.AudioAnalysis('FULL_PATH_TO_AUDIO_FILE')
  56. .. _Analyze API: http://developer.echonest.com/pages/overview?version=2
  57. .. _Echo Nest: http://the.echonest.com/
  58. """
  59. def __init__(self, path_or_identifier):
  60. """
  61. Constructor. If the argument is a valid local path or a URL,
  62. the track ID is generated by uploading the file to the `Echo Nest`_
  63. `Analyze API`_\. Otherwise, the argument is assumed to be
  64. the track ID.
  65. :param path_or_identifier: A string representing either a path to a local
  66. file, or the ID of a file that has already
  67. been uploaded for analysis.
  68. .. _Analyze API: http://developer.echonest.com/docs/v4/track.html
  69. .. _Echo Nest: http://the.echonest.com/
  70. """
  71. if type(path_or_identifier) is not str:
  72. # Argument is invalid.
  73. raise TypeError("Argument 'path_or_identifier' must be a string \
  74. representing either a filename, track ID, or MD5.")
  75. # see if path_or_identifier is a path or an ID
  76. if os.path.isfile(path_or_identifier):
  77. # it's a filename
  78. self.pyechonest_track = track.track_from_filename(path_or_identifier)
  79. else:
  80. if path_or_identifier.startswith('music://') or \
  81. (path_or_identifier.startswith('TR') and \
  82. len(path_or_identifier) == 18):
  83. # it's an id
  84. self.pyechonest_track = track.track_from_id(path_or_identifier)
  85. elif len(path_or_identifier) == 32:
  86. # it's an md5
  87. self.pyechonest_track = track.track_from_md5(path_or_identifier)
  88. if self.pyechonest_track is None:
  89. raise EchoNestRemixError('Could not find track %s' % path_or_identifier)
  90. self.source = None
  91. self._bars = None
  92. self._beats = None
  93. self._tatums = None
  94. self._sections = None
  95. self._segments = None
  96. self.identifier = self.pyechonest_track.id
  97. self.metadata = self.pyechonest_track.meta
  98. for attribute in ('time_signature', 'mode', 'tempo', 'key'):
  99. d = {}
  100. d['value'] = getattr(self.pyechonest_track, attribute)
  101. d['confidence'] = getattr(self.pyechonest_track, attribute + '_confidence')
  102. setattr(self, attribute, d)
  103. for attribute in ('end_of_fade_in', 'start_of_fade_out', 'duration', 'loudness'):
  104. setattr(self, attribute, getattr(self.pyechonest_track, attribute))
  105. @property
  106. def bars(self):
  107. if self._bars is None:
  108. self._bars = _dataParser('bar', self.pyechonest_track.bars)
  109. self._bars.attach(self)
  110. return self._bars
  111. @property
  112. def beats(self):
  113. if self._beats is None:
  114. self._beats = _dataParser('beat', self.pyechonest_track.beats)
  115. self._beats.attach(self)
  116. return self._beats
  117. @property
  118. def tatums(self):
  119. if self._tatums is None:
  120. self._tatums = _dataParser('tatum', self.pyechonest_track.tatums)
  121. self._tatums.attach(self)
  122. return self._tatums
  123. @property
  124. def sections(self):
  125. if self._sections is None:
  126. self._sections = _attributeParser('section', self.pyechonest_track.sections)
  127. self._sections.attach(self)
  128. return self._sections
  129. @property
  130. def segments(self):
  131. if self._segments is None:
  132. self._segments = _segmentsParser(self.pyechonest_track.segments)
  133. self._segments.attach(self)
  134. return self._segments
  135. def __getstate__(self):
  136. """
  137. Eliminates the circular reference for pickling.
  138. """
  139. dictclone = self.__dict__.copy()
  140. del dictclone['source']
  141. return dictclone
  142. def __setstate__(self, state):
  143. """
  144. Recreates circular references after unpickling.
  145. """
  146. self.__dict__.update(state)
  147. if hasattr(AudioAnalysis, 'CACHED_VARIABLES'):
  148. for cached_var in AudioAnalysis.CACHED_VARIABLES:
  149. if type(object.__getattribute__(self, cached_var)) == AudioQuantumList:
  150. object.__getattribute__(self, cached_var).attach(self)
  151. class AudioRenderable(object):
  152. """
  153. An object that gives an `AudioData` in response to a call to its `render`\()
  154. method.
  155. Intended to be an abstract class that helps enforce the `AudioRenderable`
  156. protocol. Picked up a couple of convenience methods common to many descendants.
  157. Every `AudioRenderable` must provide three things:
  158. render()
  159. A method returning the `AudioData` for the object. The rhythmic duration (point
  160. at which any following audio is appended) is signified by the `endindex` accessor,
  161. measured in samples.
  162. source
  163. An accessor pointing to the `AudioData` that contains the original sample data of
  164. (a superset of) this audio object.
  165. duration
  166. An accessor returning the rhythmic duration (in seconds) of the audio object.
  167. """
  168. def resolve_source(self, alt):
  169. """
  170. Given an alternative, fallback `alt` source, return either `self`'s
  171. source or the alternative. Throw an informative error if no source
  172. is found.
  173. Utility code that ended up being replicated in several places, so
  174. it ended up here. Not necessary for use in the RenderableAudioObject
  175. protocol.
  176. """
  177. if hasattr(self, 'source'):
  178. source = self.source
  179. else:
  180. if isinstance(alt, AudioData):
  181. source = alt
  182. else:
  183. print >> sys.stderr, self.__repr__()
  184. raise EchoNestRemixError("%s has no implicit or explicit source \
  185. during rendering." %
  186. (self.__class__.__name__, ))
  187. return source
  188. @staticmethod
  189. def init_audio_data(source, num_samples):
  190. """
  191. Convenience function for rendering: return a pre-allocated, zeroed
  192. `AudioData`.
  193. """
  194. if source.numChannels > 1:
  195. newchans = source.numChannels
  196. newshape = (num_samples, newchans)
  197. else:
  198. newchans = 1
  199. newshape = (num_samples,)
  200. return AudioData32(shape=newshape, sampleRate=source.sampleRate,
  201. numChannels=newchans, defer=False)
  202. def sources(self):
  203. return set([self.source])
  204. def encode(self, filename):
  205. """
  206. Shortcut function that takes care of the need to obtain an `AudioData`
  207. object first, through `render`.
  208. """
  209. self.render().encode(filename)
  210. class AudioData(AudioRenderable):
  211. """
  212. Handles audio data transparently. A smart audio container
  213. with accessors that include:
  214. sampleRate
  215. samples per second
  216. numChannels
  217. number of channels
  218. data
  219. a `numpy.array`_
  220. .. _numpy.array: http://docs.scipy.org/doc/numpy/reference/generated/numpy.array.html
  221. """
  222. def __init__(self, filename=None, ndarray = None, shape=None, sampleRate=None, numChannels=None, defer=False, verbose=True):
  223. """
  224. Given an input `ndarray`, import the sample values and shape
  225. (if none is specified) of the input `numpy.array`.
  226. Given a `filename` (and an input ndarray), use ffmpeg to convert
  227. the file to wave, then load the file into the data,
  228. auto-detecting the sample rate, and number of channels.
  229. :param filename: a path to an audio file for loading its sample
  230. data into the AudioData.data
  231. :param ndarray: a `numpy.array`_ instance with sample data
  232. :param shape: a tuple of array dimensions
  233. :param sampleRate: sample rate, in Hz
  234. :param numChannels: number of channels
  235. .. _numpy.array: http://docs.scipy.org/doc/numpy/reference/generated/numpy.array.html
  236. """
  237. self.verbose = verbose
  238. if (filename is not None) and (ndarray is None) :
  239. if sampleRate is None or numChannels is None:
  240. # force sampleRate and numChannels to 44100 hz, 2
  241. sampleRate, numChannels = 44100, 2
  242. parsestring = ffmpeg(filename, overwrite=False, verbose=self.verbose)
  243. ffmpeg_error_check(parsestring[1])
  244. sampleRate, numChannels = settings_from_ffmpeg(parsestring[1])
  245. self.defer = defer
  246. self.filename = filename
  247. self.sampleRate = sampleRate
  248. self.numChannels = numChannels
  249. self.convertedfile = None
  250. self.endindex = 0
  251. if shape is None and isinstance(ndarray, numpy.ndarray) and not self.defer:
  252. self.data = numpy.zeros(ndarray.shape, dtype=numpy.int16)
  253. elif shape is not None and not self.defer:
  254. self.data = numpy.zeros(shape, dtype=numpy.int16)
  255. elif not self.defer and self.filename:
  256. self.data = None
  257. self.load()
  258. else:
  259. self.data = None
  260. if ndarray is not None and self.data is not None:
  261. self.endindex = len(ndarray)
  262. self.data[0:self.endindex] = ndarray
  263. def load(self):
  264. if isinstance(self.data, numpy.ndarray):
  265. return
  266. temp_file_handle = None
  267. if self.filename.lower().endswith(".wav") and (self.sampleRate, self.numChannels) == (44100, 2):
  268. file_to_read = self.filename
  269. elif self.convertedfile:
  270. file_to_read = self.convertedfile
  271. else:
  272. temp_file_handle, self.convertedfile = tempfile.mkstemp(".wav")
  273. result = ffmpeg(self.filename, self.convertedfile, overwrite=True,
  274. numChannels=self.numChannels, sampleRate=self.sampleRate, verbose=self.verbose)
  275. ffmpeg_error_check(result[1])
  276. file_to_read = self.convertedfile
  277. w = wave.open(file_to_read, 'r')
  278. numFrames = w.getnframes()
  279. raw = w.readframes(numFrames)
  280. sampleSize = numFrames * self.numChannels
  281. data = numpy.frombuffer(raw, dtype="<h", count=sampleSize)
  282. ndarray = numpy.array(data, dtype=numpy.int16)
  283. if self.numChannels > 1:
  284. ndarray.resize((numFrames, self.numChannels))
  285. self.data = numpy.zeros(ndarray.shape, dtype=numpy.int16)
  286. self.endindex = 0
  287. if ndarray is not None:
  288. self.endindex = len(ndarray)
  289. self.data = ndarray
  290. if temp_file_handle is not None:
  291. os.close(temp_file_handle)
  292. w.close()
  293. def __getitem__(self, index):
  294. """
  295. Fetches a frame or slice. Returns an individual frame (if the index
  296. is a time offset float or an integer sample number) or a slice if
  297. the index is an `AudioQuantum` (or quacks like one).
  298. """
  299. if not isinstance(self.data, numpy.ndarray) and self.defer:
  300. self.load()
  301. if isinstance(index, float):
  302. index = int(index*self.sampleRate)
  303. elif hasattr(index, "start") and hasattr(index, "duration"):
  304. index = slice(float(index.start), index.start + index.duration)
  305. if isinstance(index, slice):
  306. if ( hasattr(index.start, "start") and
  307. hasattr(index.stop, "duration") and
  308. hasattr(index.stop, "start") ) :
  309. index = slice(index.start.start, index.stop.start + index.stop.duration)
  310. if isinstance(index, slice):
  311. return self.getslice(index)
  312. else:
  313. return self.getsample(index)
  314. def getslice(self, index):
  315. "Help `__getitem__` return a new AudioData for a given slice"
  316. if not isinstance(self.data, numpy.ndarray) and self.defer:
  317. self.load()
  318. if isinstance(index.start, float):
  319. index = slice(int(index.start * self.sampleRate),
  320. int(index.stop * self.sampleRate), index.step)
  321. return AudioData(None, self.data[index], sampleRate=self.sampleRate,
  322. numChannels=self.numChannels, defer=False)
  323. def getsample(self, index):
  324. """
  325. Help `__getitem__` return a frame (all channels for a given
  326. sample index)
  327. """
  328. if not isinstance(self.data, numpy.ndarray) and self.defer:
  329. self.load()
  330. if isinstance(index, int):
  331. return self.data[index]
  332. else:
  333. #let the numpy array interface be clever
  334. return AudioData(None, self.data[index], defer=False)
  335. def pad_with_zeros(self, num_samples):
  336. if num_samples > 0:
  337. if self.numChannels == 1:
  338. extra_shape = (num_samples,)
  339. else:
  340. extra_shape = (num_samples, self.numChannels)
  341. self.data = numpy.append(self.data,
  342. numpy.zeros(extra_shape, dtype=numpy.int16), axis=0)
  343. def append(self, another_audio_data):
  344. "Appends the input to the end of this `AudioData`."
  345. extra = len(another_audio_data.data) - (len(self.data) - self.endindex)
  346. self.pad_with_zeros(extra)
  347. self.data[self.endindex : self.endindex + len(another_audio_data)] = self.data[self.endindex : self.endindex + len(another_audio_data)] + another_audio_data.data
  348. self.endindex += another_audio_data.endindex
  349. def sum(self, another_audio_data):
  350. extra = len(another_audio_data.data) - len(self.data)
  351. self.pad_with_zeros(extra)
  352. compare_limit = min(len(another_audio_data.data), len(self.data)) - 1
  353. self.data[ : compare_limit] += another_audio_data.data[ : compare_limit]
  354. def add_at(self, time, another_audio_data):
  355. """
  356. Adds the input `another_audio_data` to this `AudioData`
  357. at the `time` specified in seconds.
  358. """
  359. offset = int(time * self.sampleRate)
  360. extra = offset + len(another_audio_data.data) - len(self.data)
  361. self.pad_with_zeros(extra)
  362. if another_audio_data.numChannels < self.numChannels:
  363. another_audio_data.data = numpy.repeat(another_audio_data.data, self.numChannels).reshape(len(another_audio_data), self.numChannels)
  364. self.data[offset : offset + len(another_audio_data.data)] += another_audio_data.data
  365. def __len__(self):
  366. if self.data is not None:
  367. return len(self.data)
  368. else:
  369. return 0
  370. def __add__(self, other):
  371. """Supports stuff like this: sound3 = sound1 + sound2"""
  372. return assemble([self, other], numChannels=self.numChannels,
  373. sampleRate=self.sampleRate)
  374. def encode(self, filename=None, mp3=None):
  375. """
  376. Outputs an MP3 or WAVE file to `filename`.
  377. Format is determined by `mp3` parameter.
  378. """
  379. if not mp3 and filename.lower().endswith('.wav'):
  380. mp3 = False
  381. else:
  382. mp3 = True
  383. if mp3:
  384. foo, tempfilename = tempfile.mkstemp(".wav")
  385. os.close(foo)
  386. else:
  387. tempfilename = filename
  388. fid = open(tempfilename, 'wb')
  389. # Based on Scipy svn
  390. # http://projects.scipy.org/pipermail/scipy-svn/2007-August/001189.html
  391. fid.write('RIFF')
  392. fid.write(struct.pack('<i',0)) # write a 0 for length now, we'll go back and add it later
  393. fid.write('WAVE')
  394. # fmt chunk
  395. fid.write('fmt ')
  396. if self.data.ndim == 1:
  397. noc = 1
  398. else:
  399. noc = self.data.shape[1]
  400. bits = self.data.dtype.itemsize * 8
  401. sbytes = self.sampleRate * (bits / 8) * noc
  402. ba = noc * (bits / 8)
  403. fid.write(struct.pack('<ihHiiHH', 16, 1, noc, self.sampleRate, sbytes, ba, bits))
  404. # data chunk
  405. fid.write('data')
  406. fid.write(struct.pack('<i', self.data.nbytes))
  407. self.data.tofile(fid)
  408. # Determine file size and place it in correct
  409. # position at start of the file.
  410. size = fid.tell()
  411. fid.seek(4)
  412. fid.write(struct.pack('<i', size - 8))
  413. fid.close()
  414. if not mp3:
  415. return tempfilename
  416. # now convert it to mp3
  417. if not filename.lower().endswith('.mp3'):
  418. filename = filename + '.mp3'
  419. try:
  420. bitRate = config.MP3_BITRATE
  421. except (NameError, AttributeError):
  422. bitRate = 128
  423. parsestring = ffmpeg(tempfilename, filename, bitRate=bitRate, verbose=self.verbose)
  424. ffmpeg_error_check(parsestring[1])
  425. if tempfilename != filename:
  426. if self.verbose:
  427. print >> sys.stderr, "Deleting: %s" % tempfilename
  428. os.remove(tempfilename)
  429. return filename
  430. def unload(self):
  431. self.data = None
  432. if self.convertedfile:
  433. if self.verbose:
  434. print >> sys.stderr, "Deleting: %s" % self.convertedfile
  435. os.remove(self.convertedfile)
  436. self.convertedfile = None
  437. def render(self, start=0.0, to_audio=None, with_source=None):
  438. if not to_audio:
  439. return self
  440. if with_source != self:
  441. return
  442. to_audio.add_at(start, self)
  443. return
  444. @property
  445. def duration(self):
  446. return float(self.endindex) / self.sampleRate
  447. @property
  448. def source(self):
  449. return self
  450. class AudioData32(AudioData):
  451. """A 32-bit variant of AudioData, intended for data collection on
  452. audio rendering with headroom."""
  453. def __init__(self, filename=None, ndarray = None, shape=None, sampleRate=None, numChannels=None, defer=False, verbose=True):
  454. """
  455. Special form of AudioData to allow for headroom when collecting samples.
  456. """
  457. self.verbose = verbose
  458. if (filename is not None) and (ndarray is None) :
  459. if sampleRate is None or numChannels is None:
  460. # force sampleRate and numChannels to 44100 hz, 2
  461. sampleRate, numChannels = 44100, 2
  462. parsestring = ffmpeg(filename, overwrite=False, verbose=self.verbose)
  463. ffmpeg_error_check(parsestring[1])
  464. sampleRate, numChannels = settings_from_ffmpeg(parsestring[1])
  465. self.defer = defer
  466. self.filename = filename
  467. self.sampleRate = sampleRate
  468. self.numChannels = numChannels
  469. self.convertedfile = None
  470. self.normalized = None
  471. if shape is None and isinstance(ndarray, numpy.ndarray) and not self.defer:
  472. self.data = numpy.zeros(ndarray.shape, dtype=numpy.int32)
  473. elif shape is not None and not self.defer:
  474. self.data = numpy.zeros(shape, dtype=numpy.int32)
  475. elif not self.defer and self.filename:
  476. self.load()
  477. else:
  478. self.data = None
  479. self.endindex = 0
  480. if ndarray is not None and self.data is not None:
  481. self.endindex = len(ndarray)
  482. self.data[0:self.endindex] = ndarray
  483. def load(self):
  484. if isinstance(self.data, numpy.ndarray):
  485. return
  486. temp_file_handle = None
  487. if self.filename.lower().endswith(".wav") and (self.sampleRate, self.numChannels) == (44100, 2):
  488. file_to_read = self.filename
  489. elif self.convertedfile:
  490. file_to_read = self.convertedfile
  491. else:
  492. temp_file_handle, self.convertedfile = tempfile.mkstemp(".wav")
  493. result = ffmpeg(self.filename, self.convertedfile, overwrite=True,
  494. numChannels=self.numChannels, sampleRate=self.sampleRate, verbose=self.verbose)
  495. ffmpeg_error_check(result[1])
  496. file_to_read = self.convertedfile
  497. w = wave.open(file_to_read, 'r')
  498. numFrames = w.getnframes()
  499. raw = w.readframes(numFrames)
  500. sampleSize = numFrames * self.numChannels
  501. data = numpy.frombuffer(raw, dtype="<h", count=sampleSize)
  502. ndarray = numpy.array(data, dtype=numpy.int16)
  503. if self.numChannels > 1:
  504. ndarray.resize((numFrames, self.numChannels))
  505. self.data = numpy.zeros(ndarray.shape, dtype=numpy.int32)
  506. self.endindex = 0
  507. if ndarray is not None:
  508. self.endindex = len(ndarray)
  509. self.data[0:self.endindex] = ndarray
  510. if temp_file_handle is not None:
  511. os.close(temp_file_handle)
  512. w.close()
  513. def encode(self, filename=None, mp3=None):
  514. """
  515. Outputs an MP3 or WAVE file to `filename`.
  516. Format is determined by `mp3` parameter.
  517. """
  518. self.normalize()
  519. temp_file_handle = None
  520. if not mp3 and filename.lower().endswith('.wav'):
  521. mp3 = False
  522. else:
  523. mp3 = True
  524. if mp3:
  525. temp_file_handle, tempfilename = tempfile.mkstemp(".wav")
  526. else:
  527. tempfilename = filename
  528. fid = open(tempfilename, 'wb')
  529. # Based on Scipy svn
  530. # http://projects.scipy.org/pipermail/scipy-svn/2007-August/001189.html
  531. fid.write('RIFF')
  532. fid.write(struct.pack('<i',0)) # write a 0 for length now, we'll go back and add it later
  533. fid.write('WAVE')
  534. # fmt chunk
  535. fid.write('fmt ')
  536. if self.normalized.ndim == 1:
  537. noc = 1
  538. else:
  539. noc = self.normalized.shape[1]
  540. bits = self.normalized.dtype.itemsize * 8
  541. sbytes = self.sampleRate*(bits / 8)*noc
  542. ba = noc * (bits / 8)
  543. fid.write(struct.pack('<ihHiiHH', 16, 1, noc, self.sampleRate, sbytes, ba, bits))
  544. # data chunk
  545. fid.write('data')
  546. fid.write(struct.pack('<i', self.normalized.nbytes))
  547. self.normalized.tofile(fid)
  548. # Determine file size and place it in correct
  549. # position at start of the file.
  550. size = fid.tell()
  551. fid.seek(4)
  552. fid.write(struct.pack('<i', size-8))
  553. fid.close()
  554. self.normalized = None
  555. if not mp3:
  556. return tempfilename
  557. # now convert it to mp3
  558. if not filename.lower().endswith('.mp3'):
  559. filename = filename + '.mp3'
  560. try:
  561. bitRate = config.MP3_BITRATE
  562. except (NameError, AttributeError):
  563. bitRate = 128
  564. parsestring = ffmpeg(tempfilename, filename, bitRate=bitRate, verbose=self.verbose)
  565. ffmpeg_error_check(parsestring[1])
  566. if tempfilename != filename:
  567. if self.verbose:
  568. print >> sys.stderr, "Deleting: %s" % tempfilename
  569. os.remove(tempfilename)
  570. if temp_file_handle is not None:
  571. os.close(temp_file_handle)
  572. return filename
  573. def normalize(self):
  574. """Return to 16-bit for encoding."""
  575. if self.numChannels == 1:
  576. self.normalized = numpy.zeros((self.data.shape[0],), dtype=numpy.int16)
  577. else:
  578. self.normalized = numpy.zeros((self.data.shape[0], self.data.shape[1]), dtype=numpy.int16)
  579. factor = 32767.0 / numpy.max(numpy.absolute(self.data.flatten()))
  580. # If the max was 32768, don't bother scaling:
  581. if factor < 1.000031:
  582. self.normalized[:len(self.data)] += self.data * factor
  583. else:
  584. self.normalized[:len(self.data)] += self.data
  585. def pad_with_zeros(self, num_samples):
  586. if num_samples > 0:
  587. if self.numChannels == 1:
  588. extra_shape = (num_samples,)
  589. else:
  590. extra_shape = (num_samples, self.numChannels)
  591. self.data = numpy.append(self.data,
  592. numpy.zeros(extra_shape, dtype=numpy.int32), axis=0)
  593. def get_os():
  594. """returns is_linux, is_mac, is_windows"""
  595. if hasattr(os, 'uname'):
  596. if os.uname()[0] == "Darwin":
  597. return False, True, False
  598. return True, False, False
  599. return False, False, True
  600. def ffmpeg(infile, outfile=None, overwrite=True, bitRate=None, numChannels=None, sampleRate=None, verbose=True):
  601. """
  602. Executes ffmpeg through the shell to convert or read media files.
  603. """
  604. command = "en-ffmpeg"
  605. if overwrite:
  606. command += " -y"
  607. command += " -i \"" + infile + "\""
  608. if bitRate is not None:
  609. command += " -ab " + str(bitRate) + "k"
  610. if numChannels is not None:
  611. command += " -ac " + str(numChannels)
  612. if sampleRate is not None:
  613. command += " -ar " + str(sampleRate)
  614. if outfile is not None:
  615. command += " \"%s\"" % outfile
  616. if verbose:
  617. print >> sys.stderr, command
  618. (lin, mac, win) = get_os()
  619. if(not win):
  620. p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
  621. else:
  622. p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=False)
  623. return_val = p.communicate()
  624. return return_val
  625. def settings_from_ffmpeg(parsestring):
  626. """
  627. Parses the output of ffmpeg to determine sample rate and frequency of
  628. an audio file.
  629. """
  630. parse = parsestring.split('\n')
  631. freq, chans = 44100, 2
  632. for line in parse:
  633. if "Stream #0" in line and "Audio" in line:
  634. segs = line.split(", ")
  635. for s in segs:
  636. if "Hz" in s:
  637. #print "Found: "+str(s.split(" ")[0])+"Hz"
  638. freq = int(s.split(" ")[0])
  639. elif "stereo" in s:
  640. #print "stereo"
  641. chans = 2
  642. elif "2 channels" in s:
  643. #print "stereo"
  644. chans = 2
  645. elif "mono" in s:
  646. #print "mono"
  647. chans = 1
  648. elif "1 channels" in s:
  649. #print "mono"
  650. chans = 1
  651. return freq, chans
  652. ffmpeg_install_instructions = """
  653. en-ffmpeg not found! Please make sure ffmpeg is installed and create a link as follows:
  654. sudo ln -s `which ffmpeg` /usr/local/bin/en-ffmpeg
  655. """
  656. def ffmpeg_error_check(parsestring):
  657. "Looks for known errors in the ffmpeg output"
  658. parse = parsestring.split('\n')
  659. error_cases = [ "Unknown format", # ffmpeg can't figure out format of input file
  660. "error occur", # an error occurred
  661. "Could not open", # user doesn't have permission to access file
  662. "not found" # could not find encoder for output file
  663. ]
  664. for num, line in enumerate(parse):
  665. if "command not found" in line:
  666. raise RuntimeError(ffmpeg_install_instructions)
  667. for error in error_cases:
  668. if error in line:
  669. report = "\n\t".join(parse[num:])
  670. raise RuntimeError("ffmpeg conversion error:\n\t" + report)
  671. def getpieces(audioData, segs):
  672. """
  673. Collects audio samples for output.
  674. Returns a new `AudioData` where the new sample data is assembled
  675. from the input audioData according to the time offsets in each
  676. of the elements of the input segs (commonly an `AudioQuantumList`).
  677. :param audioData: an `AudioData` object
  678. :param segs: an iterable containing objects that may be accessed
  679. as slices or indices for an `AudioData`
  680. """
  681. #calculate length of new segment
  682. audioData.data = None
  683. audioData.load()
  684. dur = 0
  685. for s in segs:
  686. dur += int(s.duration * audioData.sampleRate)
  687. # if I wanted to add some padding to the length, I'd do it here
  688. #determine shape of new array
  689. if len(audioData.data.shape) > 1:
  690. newshape = (dur, audioData.data.shape[1])
  691. newchans = audioData.data.shape[1]
  692. else:
  693. newshape = (dur,)
  694. newchans = 1
  695. #make accumulator segment
  696. newAD = AudioData(shape=newshape, sampleRate=audioData.sampleRate,
  697. numChannels=newchans, defer=False, verbose=audioData.verbose)
  698. #concatenate segs to the new segment
  699. for s in segs:
  700. newAD.append(audioData[s])
  701. # audioData.unload()
  702. return newAD
  703. def assemble(audioDataList, numChannels=1, sampleRate=44100, verbose=True):
  704. """
  705. Collects audio samples for output.
  706. Returns a new `AudioData` object assembled
  707. by concatenating all the elements of audioDataList.
  708. :param audioDatas: a list of `AudioData` objects
  709. """
  710. if numChannels == 1:
  711. new_shape = (sum([len(x.data) for x in audioDataList]),)
  712. else:
  713. new_shape = (sum([len(x.data) for x in audioDataList]),numChannels)
  714. new_data = AudioData(shape=new_shape, numChannels=numChannels,
  715. sampleRate=sampleRate, defer=False, verbose=verbose)
  716. for ad in audioDataList:
  717. if not isinstance(ad, AudioData):
  718. raise TypeError('Encountered something other than an AudioData')
  719. new_data.append(ad)
  720. return new_data
  721. def mix(dataA,dataB,mix=0.5):
  722. """
  723. Mixes two `AudioData` objects. Assumes they have the same sample rate
  724. and number of channels.
  725. Mix takes a float 0-1 and determines the relative mix of two audios.
  726. i.e., mix=0.9 yields greater presence of dataA in the final mix.
  727. """
  728. if dataA.endindex > dataB.endindex:
  729. newdata = AudioData(ndarray=dataA.data, sampleRate=dataA.sampleRate, numChannels=dataA.numChannels, defer=False)
  730. newdata.data = newdata.data * float(mix)
  731. newdata.data[:dataB.endindex] += dataB.data[:] * (1 - float(mix))
  732. else:
  733. newdata = AudioData(ndarray=dataB.data, sampleRate=dataB.sampleRate, numChannels=dataB.numChannels, defer=False)
  734. newdata.data = newdata.data * (1.0 - float(mix))
  735. newdata.data[:dataA.endindex] += dataA.data[:] * float(mix)
  736. return newdata
  737. def megamix(dataList):
  738. """
  739. Mix together any number of `AudioData` objects. Keep the shape of
  740. the first one in the list. Assume they all have the same sample rate
  741. and number of channels.
  742. """
  743. if not isinstance(dataList, list):
  744. raise TypeError('input must be a list of AudioData objects')
  745. newdata = AudioData(shape=dataList[0].data.shape, sampleRate=dataList[0].sampleRate,
  746. numChannels=dataList[0].numChannels, defer=False)
  747. for adata in dataList:
  748. if not isinstance(adata, AudioData):
  749. raise TypeError('input must be a list of AudioData objects')
  750. if len(adata) > len(newdata):
  751. newseg = AudioData(ndarray=adata[:newdata.endindex].data,
  752. numChannels=newdata.numChannels,
  753. sampleRate=newdata.sampleRate, defer=False)
  754. newseg.endindex = newdata.endindex
  755. else:
  756. newseg = AudioData(ndarray=adata.data,
  757. numChannels=newdata.numChannels,
  758. sampleRate=newdata.sampleRate, defer=False)
  759. newseg.endindex = adata.endindex
  760. newdata.data[:newseg.endindex] += newseg.data / float(len(dataList))
  761. newdata.endindex = len(newdata)
  762. return newdata
  763. class LocalAudioFile(AudioData):
  764. """
  765. The basic do-everything class for remixing. Acts as an `AudioData`
  766. object, but with an added `analysis` selector which is an
  767. `AudioAnalysis` object. It conditianally uploads the file
  768. it was initialized with. If the file is already known to the
  769. Analyze API, then it does not bother uploading the file.
  770. """
  771. def __init__(self, filename, verbose=True, defer=False):
  772. """
  773. :param filename: path to a local MP3 file
  774. """
  775. AudioData.__init__(self, filename=filename, verbose=verbose, defer=defer)
  776. track_md5 = hashlib.md5(file(filename, 'rb').read()).hexdigest()
  777. if verbose:
  778. print >> sys.stderr, "Computed MD5 of file is " + track_md5
  779. try:
  780. if verbose:
  781. print >> sys.stderr, "Probing for existing analysis"
  782. tempanalysis = AudioAnalysis(track_md5)
  783. except Exception, e:
  784. if verbose:
  785. print >> sys.stderr, "Analysis not found. Uploading..."
  786. tempanalysis = AudioAnalysis(filename)
  787. self.analysis = tempanalysis
  788. self.analysis.source = self
  789. def toxml(self, context=None):
  790. raise NotImplementedError
  791. @property
  792. def duration(self):
  793. """
  794. Since we consider `AudioFile` to be an evolved version of
  795. `AudioData`, we return the measured duration from the analysis.
  796. """
  797. return self.analysis.duration
  798. def __setstate__(self, state):
  799. """
  800. Recreates circular reference after unpickling.
  801. """
  802. self.__dict__.update(state)
  803. self.analysis.source = weakref.proxy(self)
  804. class LocalAnalysis(object):
  805. """
  806. Like `LocalAudioFile`, it conditionally uploads the file with which
  807. it was initialized. Unlike `LocalAudioFile`, it is not a subclass of
  808. `AudioData`, so contains no sample data.
  809. """
  810. def __init__(self, filename, verbose=True):
  811. """
  812. :param filename: path to a local MP3 file
  813. """
  814. track_md5 = hashlib.md5(file(filename, 'rb').read()).hexdigest()
  815. if verbose:
  816. print >> sys.stderr, "Computed MD5 of file is " + track_md5
  817. try:
  818. if verbose:
  819. print >> sys.stderr, "Probing for existing analysis"
  820. tempanalysis = AudioAnalysis(track_md5)
  821. except Exception, e:
  822. print e
  823. if verbose:
  824. print >> sys.stderr, "Analysis not found. Uploading..."
  825. tempanalysis = AudioAnalysis(filename)
  826. self.analysis = tempanalysis
  827. self.analysis.source = self
  828. class AudioQuantum(AudioRenderable) :
  829. """
  830. A unit of musical time, identified at minimum with a start time and
  831. a duration, both in seconds. It most often corresponds with a `section`,
  832. `bar`, `beat`, `tatum`, or (by inheritance) `segment` obtained from an Analyze
  833. API call.
  834. Additional properties include:
  835. end
  836. computed time offset for convenience: `start` + `duration`
  837. container
  838. a circular reference to the containing `AudioQuantumList`,
  839. created upon creation of the `AudioQuantumList` that covers
  840. the whole track
  841. """
  842. def __init__(self, start=0, duration=0, kind=None, confidence=None, source=None) :
  843. """
  844. Initializes an `AudioQuantum`.
  845. :param start: offset from the start of the track, in seconds
  846. :param duration: length of the `AudioQuantum`
  847. :param kind: string containing what kind of rhythm unit it came from
  848. :param confidence: float between zero and one
  849. """
  850. self.start = start
  851. self.duration = duration
  852. self.kind = kind
  853. self.confidence = confidence
  854. self._source = source
  855. def get_end(self):
  856. return self.start + self.duration
  857. end = property(get_end, doc="""
  858. A computed property: the sum of `start` and `duration`.
  859. """)
  860. def get_source(self):
  861. "Returns itself or its parent."
  862. if self._source:
  863. return self._source
  864. else:
  865. source = None
  866. try:
  867. source = self.container.source
  868. except AttributeError:
  869. source = None
  870. return source
  871. def set_source(self, value):
  872. if isinstance(value, AudioData):
  873. self._source = value
  874. else:
  875. raise TypeError("Source must be an instance of echonest.audio.AudioData")
  876. source = property(get_source, set_source, doc="""
  877. The `AudioData` source for the AudioQuantum.
  878. """)
  879. def parent(self):
  880. """
  881. Returns the containing `AudioQuantum` in the rhythm hierarchy:
  882. a `tatum` returns a `beat`, a `beat` returns a `bar`, and a `bar` returns a
  883. `section`.
  884. """
  885. pars = {'tatum': 'beats',
  886. 'beat': 'bars',
  887. 'bar': 'sections'}
  888. try:
  889. uppers = getattr(self.container.container, pars[self.kind])
  890. return uppers.that(selection.overlap(self))[0]
  891. except LookupError:
  892. # Might not be in pars, might not have anything in parent.
  893. return None
  894. def children(self):
  895. """
  896. Returns an `AudioQuantumList` of the AudioQuanta that it contains,
  897. one step down the hierarchy. A `beat` returns `tatums`, a `bar` returns
  898. `beats`, and a `section` returns `bars`.
  899. """
  900. chils = {'beat': 'tatums',
  901. 'bar': 'beats',
  902. 'section': 'bars'}
  903. try:
  904. downers = getattr(self.container.container, chils[self.kind])
  905. return downers.that(selection.are_contained_by(self))
  906. except LookupError:
  907. return None
  908. def group(self):
  909. """
  910. Returns the `children`\() of the `AudioQuantum`\'s `parent`\().
  911. In other words: 'siblings'. If no parent is found, then return the
  912. `AudioQuantumList` for the whole track.
  913. """
  914. if self.parent():
  915. return self.parent().children()
  916. else:
  917. return self.container
  918. def prev(self, step=1):
  919. """
  920. Step backwards in the containing `AudioQuantumList`.
  921. Returns `self` if a boundary is reached.
  922. """
  923. group = self.container
  924. try:
  925. loc = group.index(self)
  926. new = max(loc - step, 0)
  927. return group[new]
  928. except Exception:
  929. return self
  930. def next(self, step=1):
  931. """
  932. Step forward in the containing `AudioQuantumList`.
  933. Returns `self` if a boundary is reached.
  934. """
  935. group = self.container
  936. try:
  937. loc = group.index(self)
  938. new = min(loc + step, len(group))
  939. return group[new]
  940. except Exception:
  941. return self
  942. def __str__(self):
  943. """
  944. Lists the `AudioQuantum`.kind with start and
  945. end times, in seconds, e.g.::
  946. "segment (20.31 - 20.42)"
  947. """
  948. return "%s (%.2f - %.2f)" % (self.kind, self.start, self.end)
  949. def __repr__(self):
  950. """
  951. A string representing a constructor, including kind, start time,
  952. duration, and (if it exists) confidence, e.g.::
  953. "AudioQuantum(kind='tatum', start=42.198267, duration=0.1523394)"
  954. """
  955. if self.confidence is not None:
  956. return "AudioQuantum(kind='%s', start=%f, duration=%f, confidence=%f)" % (self.kind, self.start, self.duration, self.confidence)
  957. else:
  958. return "AudioQuantum(kind='%s', start=%f, duration=%f)" % (self.kind, self.start, self.duration)
  959. def local_context(self):
  960. """
  961. Returns a tuple of (*index*, *length*) within rhythm siblings, where
  962. *index* is the (zero-indexed) position within its `group`\(), and
  963. *length* is the number of siblings within its `group`\().
  964. """
  965. group = self.group()
  966. count = len(group)
  967. try:
  968. loc = group.index(self)
  969. except Exception: # seem to be some uncontained beats
  970. loc = 0
  971. return (loc, count,)
  972. def absolute_context(self):
  973. """
  974. Returns a tuple of (*index*, *length*) within the containing
  975. `AudioQuantumList`, where *index* is the (zero-indexed) position within
  976. its container, and *length* is the number of siblings within the
  977. container.
  978. """
  979. group = self.container
  980. count = len(group)
  981. loc = group.index(self)
  982. return (loc, count,)
  983. def context_string(self):
  984. """
  985. Returns a one-indexed, human-readable version of context.
  986. For example::
  987. "bar 4 of 142, beat 3 of 4, tatum 2 of 3"
  988. """
  989. if self.parent() and self.kind != "bar":
  990. return "%s, %s %i of %i" % (self.parent().context_string(),
  991. self.kind, self.local_context()[0] + 1,
  992. self.local_context()[1])
  993. else:
  994. return "%s %i of %i" % (self.kind, self.absolute_context()[0] + 1,
  995. self.absolute_context()[1])
  996. def __getstate__(self):
  997. """
  998. Eliminates the circular reference for pickling.
  999. """
  1000. dictclone = self.__dict__.copy()
  1001. if 'container' in dictclone:
  1002. del dictclone['container']
  1003. return dictclone
  1004. def toxml(self, context=None):
  1005. attributedict = {'duration': str(self.duration),
  1006. 'start': str(self.start)}
  1007. try:
  1008. if not(hasattr(context, 'source') and self.source == context.source):
  1009. attributedict['source'] = self.source.analysis.identifier
  1010. except Exception:
  1011. pass
  1012. xml = etree.Element(self.kind, attrib=attributedict)
  1013. if context:
  1014. return xml
  1015. else:
  1016. return minidom.parseString(xml).toprettyxml()
  1017. def render(self, start=0.0, to_audio=None, with_source=None):
  1018. if not to_audio:
  1019. source = self.resolve_source(with_source)
  1020. return source[self]
  1021. if with_source != self.source:
  1022. return
  1023. to_audio.add_at(start, with_source[self])
  1024. return
  1025. class AudioSegment(AudioQuantum):
  1026. """
  1027. Subclass of `AudioQuantum` for the data-rich segments returned by
  1028. the Analyze API.
  1029. """
  1030. def __init__(self, start=0., duration=0., pitches = None, timbre = None,
  1031. loudness_begin=0., loudness_max=0., time_loudness_max=0.,
  1032. loudness_end=None, kind='segment', source=None):
  1033. """
  1034. Initializes an `AudioSegment`.
  1035. :param start: offset from start of the track, in seconds
  1036. :param duration: duration of the `AudioSegment`, in seconds
  1037. :param pitches: a twelve-element list with relative loudnesses of each
  1038. pitch class, from C (pitches[0]) to B (pitches[11])
  1039. :param timbre: a twelve-element list with the loudness of each of a
  1040. principal component of time and/or frequency profile
  1041. :param kind: string identifying the kind of AudioQuantum: "segment"
  1042. :param loudness_begin: loudness in dB at the start of the segment
  1043. :param loudness_max: loudness in dB at the loudest moment of the
  1044. segment
  1045. :param time_loudness_max: time (in sec from start of segment) of
  1046. loudest moment
  1047. :param loudness_end: loudness at end of segment (if it is given)
  1048. """
  1049. self.start = start
  1050. self.duration = duration
  1051. self.pitches = pitches or []
  1052. self.timbre = timbre or []
  1053. self.loudness_begin = loudness_begin
  1054. self.loudness_max = loudness_max
  1055. self.time_loudness_max = time_loudness_max
  1056. if loudness_end:
  1057. self.loudness_end = loudness_end
  1058. self.kind = kind
  1059. self.confidence = None
  1060. self._source = source
  1061. class ModifiedRenderable(AudioRenderable):
  1062. """Class that contains any AudioRenderable, but overrides the
  1063. render() method with nested effects, called sequentially on the
  1064. result of the preceeding effect."""
  1065. def __init__(self, original, effects=[]):
  1066. if isinstance(original, ModifiedRenderable):
  1067. self._original = original._original
  1068. self._effects = original._effects + effects
  1069. else:
  1070. self._original = original
  1071. self._effects = effects
  1072. @property
  1073. def duration(self):
  1074. dur = self._original.duration
  1075. for effect in self._effects:
  1076. if hasattr(effect, 'duration'):
  1077. dur = effect.duration(dur)
  1078. return dur
  1079. @property
  1080. def source(self):
  1081. return self._original.source
  1082. @property
  1083. def sources(self):
  1084. return self._original.sources
  1085. def render(self, start=0.0, to_audio=None, with_source=None):
  1086. if not to_audio:
  1087. source = self.resolve_source(with_source)
  1088. base = self._original.render(with_source=with_source)
  1089. copy = AudioData32(ndarray=base.data, sampleRate=base.sampleRate, numChannels=base.numChannels, defer=False)
  1090. for effect in self._effects:
  1091. copy = effect.modify(copy)
  1092. return copy
  1093. if with_source != self.source:
  1094. return
  1095. base = self._original.render(with_source=with_source)
  1096. copy = AudioData32(ndarray=base.data, shape=base.data.shape, sampleRate=base.sampleRate, numChannels=base.numChannels, defer=False)
  1097. for effect in self._effects:
  1098. copy = effect.modify(copy)
  1099. to_audio.add_at(start, copy)
  1100. return
  1101. def toxml(self, context=None):
  1102. outerattributedict = {'duration': str(self.duration)}
  1103. node = etree.Element("modified_audioquantum", attrib=outerattributedict)
  1104. innerattributedict = {'duration': str(self._original.duration),
  1105. 'start': str(self._original.start)}
  1106. try:
  1107. if not(hasattr(context, 'source') and self.source == context.source):
  1108. innerattributedict['source'] = self.source.analysis.identifier
  1109. except Exception:
  1110. pass
  1111. orignode = etree.Element(self._original.kind, attrib=innerattributedict)
  1112. node.append(orignode)
  1113. fx = etree.Element('effects')
  1114. for effect in self._effects:
  1115. fxdict = {'id': '%s.%s' % (effect.__module__, effect.__class__.__name__)}
  1116. fxdict.update(effect.__dict__)
  1117. fx.append(etree.Element('effect', attrib=fxdict))
  1118. node.append(fx)
  1119. if context:
  1120. return node
  1121. else:
  1122. return minidom.parseString(node).toprettyxml()
  1123. class AudioEffect(object):
  1124. def __call__(self, aq):
  1125. return ModifiedRenderable(aq, [self])
  1126. class LevelDB(AudioEffect):
  1127. def __init__(self, change):
  1128. self.change = change
  1129. def modify(self, adata):
  1130. adata.data *= pow(10.,self.change/20.)
  1131. return adata
  1132. class AmplitudeFactor(AudioEffect):
  1133. def __init__(self, change):
  1134. self.change = change
  1135. def modify(self, adata):
  1136. adata.data *= self.change
  1137. return adata
  1138. class TimeTruncateFactor(AudioEffect):
  1139. def __init__(self, factor):
  1140. self.factor = factor
  1141. def duration(self, old_duration):
  1142. return old_duration * self.factor
  1143. def modify(self, adata):
  1144. endindex = int(self.factor * len(adata))
  1145. if self.factor > 1:
  1146. adata.pad_with_zeros(endindex - len(adata))
  1147. adata.endindex = endindex
  1148. return adata[:endindex]
  1149. class TimeTruncateLength(AudioEffect):
  1150. def __init__(self, new_duration):
  1151. self.new_duration = new_duration
  1152. def duration(self, old_duration):
  1153. return self.new_duration
  1154. def modify(self, adata):
  1155. endindex = int(self.new_duration * adata.sampleRate)
  1156. if self.new_duration > adata.duration:
  1157. adata.pad_with_zeros(endindex - len(adata))
  1158. adata.endindex = endindex
  1159. return adata[:endindex]
  1160. class AudioQuantumList(list, AudioRenderable):
  1161. """
  1162. A container that enables content-based selection and filtering.
  1163. A `List` that contains `AudioQuantum` objects, with additional methods
  1164. for manipulating them.
  1165. When an `AudioQuantumList` is created for a track via a call to the
  1166. Analyze API, `attach`\() is called so that its container is set to the
  1167. containing `AudioAnalysis`, and the container of each of the
  1168. `AudioQuantum` list members is set to itself.
  1169. Additional accessors now include AudioQuantum elements such as
  1170. `start`, `duration`, and `confidence`, which each return a List of the
  1171. corresponding properties in the contained AudioQuanta. A special name
  1172. is `kinds`, which returns a List of the `kind` of each `AudioQuantum`.
  1173. If `AudioQuantumList.kind` is "`segment`", then `pitches`, `timbre`,
  1174. `loudness_begin`, `loudness_max`, `time_loudness_max`, and `loudness_end`
  1175. are available.
  1176. """
  1177. def __init__(self, initial = None, kind = None, container = None, source = None):
  1178. """
  1179. Initializes an `AudioQuantumList`. All parameters are optional.
  1180. :param initial: a `List` type with the initial contents
  1181. :param kind: a label for the kind of `AudioQuantum` contained
  1182. within
  1183. :param container: a reference to the containing `AudioAnalysis`
  1184. :param source: a reference to the `AudioData` with the corresponding samples
  1185. and time base for the contained AudioQuanta
  1186. """
  1187. list.__init__(self)
  1188. self.kind = None
  1189. self._source = None
  1190. if isinstance(initial, AudioQuantumList):
  1191. self.kind = initial.kind
  1192. self.container = initial.container
  1193. self._source = initial.source
  1194. if kind:
  1195. self.kind = kind
  1196. if container:
  1197. self.container = container
  1198. if source:
  1199. self._source = source
  1200. if initial:
  1201. self.extend(initial)
  1202. def get_many(attribute):
  1203. def fun(self):
  1204. """
  1205. Returns a list of %s for each `AudioQuantum`.
  1206. """ % attribute
  1207. return [getattr(x, attribute) for x in list.__iter__(self)]
  1208. return fun
  1209. def get_many_if_segment(attribute):
  1210. def fun(self):
  1211. """
  1212. Returns a list of %s for each `Segment`.
  1213. """ % attribute
  1214. if self.kind == 'segment':
  1215. return [getattr(x, attribute) for x in list.__iter__(self)]
  1216. else:
  1217. raise AttributeError("<%s> only accessible for segments" % (attribute,))
  1218. return fun
  1219. def get_duration(self):
  1220. return sum(self.durations)
  1221. #return sum([x.duration for x in self])
  1222. def get_source(self):
  1223. "Returns its own or its parent's source."
  1224. if len(self) < 1:
  1225. return
  1226. if self._source:
  1227. return self._source
  1228. else:
  1229. try:
  1230. source = self.container.source
  1231. except AttributeError:
  1232. source = self[0].source
  1233. return source
  1234. def set_source(self, value):
  1235. "Checks input to see if it is an `AudioData`."
  1236. if isinstance(value, AudioData):
  1237. self._source = value
  1238. else:
  1239. raise TypeError("Source must be an instance of echonest.audio.AudioData")
  1240. durations = property(get_many('duration'))
  1241. kinds = property(get_many('kind'))
  1242. start = property(get_many('start'))
  1243. confidence = property(get_many('confidence'))
  1244. pitches = property(get_many_if_segment('pitches'))
  1245. timbre = property(get_many_if_segment('timbre'))
  1246. loudness_begin = property(get_many_if_segment('loudness_begin'))
  1247. loudness_max = property(get_many_if_segment('loudness_max'))
  1248. time_loudness_max = property(get_many_if_segment('time_loudness_max'))
  1249. loudness_end = property(get_many_if_segment('loudness_end'))
  1250. source = property(get_source, set_source, doc="""
  1251. The `AudioData` source for the `AudioQuantumList`.
  1252. """)
  1253. duration = property(get_duration, doc="""
  1254. Total duration of the `AudioQuantumList`.
  1255. """)
  1256. def sources(self):
  1257. ss = set()
  1258. for aq in list.__iter__(self):
  1259. ss.update(aq.sources())
  1260. return ss
  1261. def that(self, filt):
  1262. """
  1263. Method for applying a function to each of the contained
  1264. `AudioQuantum` objects. Returns a new `AudioQuantumList`
  1265. of the same `kind` containing the `AudioQuantum` objects
  1266. for which the input function is true.
  1267. See `echonest.selection` for example selection filters.
  1268. :param filt: a function that takes one `AudioQuantum` and returns
  1269. a `True` value `None`
  1270. :change: experimenting with a filter-only form
  1271. """
  1272. out = AudioQuantumList(kind=self.kind)
  1273. out.extend(filter(filt, self))
  1274. return out
  1275. def ordered_by(self, function, descending=False):
  1276. """
  1277. Returns a new `AudioQuantumList` of the same `kind` with the
  1278. original elements, but ordered from low to high according to
  1279. the input function acting as a key.
  1280. See `echonest.sorting` for example ordering functions.
  1281. :param function: a function that takes one `AudioQuantum` and returns
  1282. a comparison key
  1283. :param descending: when `True`, reverses the sort order, from
  1284. high to low
  1285. """
  1286. out = AudioQuantumList(kind=self.kind)
  1287. out.extend(sorted(self, key=function, reverse=descending))
  1288. return out
  1289. def beget(self, source, which=None):
  1290. """
  1291. There are two basic forms: a map-and-flatten and an converse-that.
  1292. The basic form, with one `function` argument, returns a new
  1293. `AudioQuantumList` so that the source function returns
  1294. `None`, one, or many AudioQuanta for each `AudioQuantum` contained within
  1295. `self`, and flattens them, in order. ::
  1296. beats.beget(the_next_ones)
  1297. A second form has the first argument `source` as an `AudioQuantumList`, and
  1298. a second argument, `which`, is used as a filter for the first argument, for
  1299. *each* of `self`. The results are collapsed and accordianned into a flat
  1300. list.
  1301. For example, calling::
  1302. beats.beget(segments, which=overlap)
  1303. Gets evaluated as::
  1304. for beat in beats:
  1305. return segments.that(overlap(beat))
  1306. And all of the `AudioQuantumList`\s that return are flattened into
  1307. a single `AudioQuantumList`.
  1308. :param source: A function of one argument that is applied to each
  1309. `AudioQuantum` of `self`, or an `AudioQuantumList`, in which case
  1310. the second argument is required.
  1311. :param which: A function of one argument that acts as a `that`\() filter
  1312. on the first argument if it is an `AudioQuantumList`, or as a filter
  1313. on the output, in the case of `source` being a function.
  1314. """
  1315. out = AudioQuantumList()
  1316. if isinstance(source, AudioQuantumList):
  1317. if not which:
  1318. raise TypeError("'beget' requires a second argument, 'which'")
  1319. out.extend(chain_from_mixed([source.that(which(x)) for x in self]))
  1320. else:
  1321. out.extend(chain_from_mixed(map(source, self)))
  1322. if which:
  1323. out = out.that(which)
  1324. return out
  1325. def attach(self, container):
  1326. """
  1327. Create circular references to the containing `AudioAnalysis` and for the
  1328. contained `AudioQuantum` objects.
  1329. """
  1330. self.container = container
  1331. for i in self:
  1332. i.container = self
  1333. def __getstate__(self):
  1334. """
  1335. Eliminates the circular reference for pickling.
  1336. """
  1337. dictclone = self.__dict__.copy()
  1338. if 'container' in dictclone:
  1339. del dictclone['container']
  1340. return dictclone
  1341. def toxml(self, context=None):
  1342. xml = etree.Element("sequence")
  1343. xml.attrib['duration'] = str(self.duration)
  1344. if not context:
  1345. xml.attrib['source'] = self.source.analysis.identifier
  1346. for s in self.sources():
  1347. xml.append(s.toxml())
  1348. elif self._source:
  1349. try:
  1350. if self.source != context.source:
  1351. xml.attrib['source'] = self.source.analysis.identifier
  1352. except Exception:
  1353. pass
  1354. for x in list.__iter__(self):
  1355. xml.append(x.toxml(context=self))
  1356. if context:
  1357. return xml
  1358. else:
  1359. return minidom.parseString(xml).toprettyxml()
  1360. def render(self, start=0.0, to_audio=None, with_source=None):
  1361. if len(self) < 1:
  1362. return
  1363. if not to_audio:
  1364. dur = 0
  1365. tempsource = self.source or list.__getitem__(self, 0).source
  1366. for aq in list.__iter__(self):
  1367. dur += int(aq.duration * tempsource.sampleRate)
  1368. to_audio = self.init_audio_data(tempsource, dur)
  1369. if not hasattr(with_source, 'data'):
  1370. for tsource in self.sources():
  1371. this_start = start
  1372. for aq in list.__iter__(self):
  1373. aq.render(start=this_start, to_audio=to_audio, with_source=tsource)
  1374. this_start += aq.duration
  1375. if tsource.defer: tsource.unload()
  1376. return to_audio
  1377. else:
  1378. if with_source not in self.sources():
  1379. return
  1380. for aq in list.__iter__(self):
  1381. aq.render(start=start, to_audio=to_audio, with_source=with_source)
  1382. start += aq.duration
  1383. class Simultaneous(AudioQuantumList):
  1384. """
  1385. Stacks all contained AudioQuanta atop one another, adding their respective
  1386. samples. The rhythmic length of the segment is the duration of the first
  1387. `AudioQuantum`, but there can be significant overlap caused by the longest
  1388. segment.
  1389. Sample usage::
  1390. Simultaneous(a.analysis.bars).encode("my.mp3")
  1391. """
  1392. def __init__(self, *args, **kwargs):
  1393. AudioQuantumList.__init__(self, *args, **kwargs)
  1394. def get_duration(self):
  1395. try:
  1396. return self[0].duration
  1397. except Exception:
  1398. return 0.
  1399. duration = property(get_duration, doc="""
  1400. Rhythmic duration of the `Simultaneous` AudioQuanta: the
  1401. same as the duration of the first in the list.
  1402. """)
  1403. def toxml(self, context=None):
  1404. xml = etree.Element("parallel")
  1405. xml.attrib['duration'] = str(self.duration)
  1406. if not context:
  1407. xml.attrib['source'] = self.source.analysis.identifier
  1408. elif self.source != context.source:
  1409. try:
  1410. xml.attrib['source'] = self.source.analysis.identifier
  1411. except Exception:
  1412. pass
  1413. for x in list.__iter__(self):
  1414. xml.append(x.toxml(context=self))
  1415. if context:
  1416. return xml
  1417. else:
  1418. return minidom.parseString(xml).toprettyxml()
  1419. def render(self, start=0.0, to_audio=None, with_source=None):
  1420. if not to_audio:
  1421. tempsource = self.source or list.__getitem__(self, 0).source
  1422. dur = int(max(self.durations) * tempsource.sampleRate)
  1423. to_audio = self.init_audio_data(tempsource, dur)
  1424. if not hasattr(with_source, 'data'):
  1425. for source in self.sources():
  1426. for aq in list.__iter__(self):
  1427. aq.render(start=start, to_audio=to_audio, with_source=source)
  1428. if source.defer: source.unload()
  1429. return to_audio
  1430. else:
  1431. if with_source not in self.sources():
  1432. return
  1433. else:
  1434. for aq in list.__iter__(self):
  1435. aq.render(start=start, to_audio=to_audio, with_source=with_source)
  1436. def _dataParser(tag, nodes):
  1437. out = AudioQuantumList(kind=tag)
  1438. for n in nodes:
  1439. out.append(AudioQuantum(start=n['start'], kind=tag, confidence=n['confidence']))
  1440. if len(out) > 1:
  1441. for i in range(len(out) - 1) :
  1442. out[i].duration = out[i+1].start - out[i].start
  1443. out[-1].duration = out[-2].duration
  1444. return out
  1445. def _attributeParser(tag, nodes):
  1446. out = AudioQuantumList(kind=tag)
  1447. for n in nodes :
  1448. out.append(AudioQuantum(n['start'], n['duration'], tag))
  1449. return out
  1450. def _segmentsParser(nodes):
  1451. out = AudioQuantumList(kind='segment')
  1452. for n in nodes:
  1453. out.append(AudioSegment(start=n['start'], duration=n['duration'],
  1454. pitches=n['pitches'], timbre=n['timbre'],
  1455. loudness_begin=n['loudness_start'],
  1456. loudness_max=n['loudness_max'],
  1457. time_loudness_max=n['loudness_max_time'],
  1458. loudness_end=n.get('loudness_end')))
  1459. return out
  1460. def chain_from_mixed(iterables):
  1461. """
  1462. Helper function to flatten a list of elements and lists
  1463. into a list of elements.
  1464. """
  1465. for y in iterables:
  1466. try:
  1467. iter(y)
  1468. for element in y:
  1469. yield element
  1470. except Exception:
  1471. yield y
  1472. class FileTypeError(Exception):
  1473. def __init__(self, filename, message):
  1474. self.filename = filename
  1475. self.message = message
  1476. def __str__(self):
  1477. return self.message+': '+self.filename
  1478. class EchoNestRemixError(Exception):
  1479. """
  1480. Error raised by the Remix API.
  1481. """
  1482. pass