PageRenderTime 602ms CodeModel.GetById 322ms app.highlight 12ms RepoModel.GetById 249ms app.codeStats 1ms

/external/youtube-dl/youtube-dl

http://echo-nest-remix.googlecode.com/
#! | 3001 lines | 2559 code | 442 blank | 0 comment | 0 complexity | 21244e50e2f1f86210dba26274891286 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1#!/usr/bin/env python
   2# -*- coding: utf-8 -*-
   3# Author: Ricardo Garcia Gonzalez
   4# Author: Danny Colligan
   5# Author: Benjamin Johnson
   6# Author: Vasyl' Vavrychuk
   7# Author: Witold Baryluk
   8# Author: Pawe?‚ Paprota
   9# Author: Gergely Imreh
  10# License: Public domain code
  11import cookielib
  12import ctypes
  13import datetime
  14import email.utils
  15import gzip
  16import htmlentitydefs
  17import httplib
  18import locale
  19import math
  20import netrc
  21import os
  22import os.path
  23import re
  24import socket
  25import string
  26import StringIO
  27import subprocess
  28import sys
  29import time
  30import urllib
  31import urllib2
  32import zlib
  33
  34# parse_qs was moved from the cgi module to the urlparse module recently.
  35try:
  36	from urlparse import parse_qs
  37except ImportError:
  38	from cgi import parse_qs
  39
  40std_headers = {
  41	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b11) Gecko/20100101 Firefox/4.0b11',
  42	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  43	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  44	'Accept-Encoding': 'gzip, deflate',
  45	'Accept-Language': 'en-us,en;q=0.5',
  46}
  47
  48simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
  49
  50def preferredencoding():
  51	"""Get preferred encoding.
  52
  53	Returns the best encoding scheme for the system, based on
  54	locale.getpreferredencoding() and some further tweaks.
  55	"""
  56	def yield_preferredencoding():
  57		try:
  58			pref = locale.getpreferredencoding()
  59			u'TEST'.encode(pref)
  60		except:
  61			pref = 'UTF-8'
  62		while True:
  63			yield pref
  64	return yield_preferredencoding().next()
  65
  66def htmlentity_transform(matchobj):
  67	"""Transforms an HTML entity to a Unicode character.
  68
  69	This function receives a match object and is intended to be used with
  70	the re.sub() function.
  71	"""
  72	entity = matchobj.group(1)
  73
  74	# Known non-numeric HTML entity
  75	if entity in htmlentitydefs.name2codepoint:
  76		return unichr(htmlentitydefs.name2codepoint[entity])
  77
  78	# Unicode character
  79	mobj = re.match(ur'(?u)#(x?\d+)', entity)
  80	if mobj is not None:
  81		numstr = mobj.group(1)
  82		if numstr.startswith(u'x'):
  83			base = 16
  84			numstr = u'0%s' % numstr
  85		else:
  86			base = 10
  87		return unichr(long(numstr, base))
  88
  89	# Unknown entity in name, return its literal representation
  90	return (u'&%s;' % entity)
  91
  92def sanitize_title(utitle):
  93	"""Sanitizes a video title so it could be used as part of a filename."""
  94	utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
  95	return utitle.replace(unicode(os.sep), u'%')
  96
  97def sanitize_open(filename, open_mode):
  98	"""Try to open the given filename, and slightly tweak it if this fails.
  99
 100	Attempts to open the given filename. If this fails, it tries to change
 101	the filename slightly, step by step, until it's either able to open it
 102	or it fails and raises a final exception, like the standard open()
 103	function.
 104
 105	It returns the tuple (stream, definitive_file_name).
 106	"""
 107	try:
 108		if filename == u'-':
 109			if sys.platform == 'win32':
 110				import msvcrt
 111				msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 112			return (sys.stdout, filename)
 113		stream = open(filename, open_mode)
 114		return (stream, filename)
 115	except (IOError, OSError), err:
 116		# In case of error, try to remove win32 forbidden chars
 117		filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
 118
 119		# An exception here should be caught in the caller
 120		stream = open(filename, open_mode)
 121		return (stream, filename)
 122
 123def timeconvert(timestr):
 124    """Convert RFC 2822 defined time string into system timestamp"""
 125    timestamp = None
 126    timetuple = email.utils.parsedate_tz(timestr)
 127    if timetuple is not None:
 128        timestamp = email.utils.mktime_tz(timetuple)
 129    return timestamp
 130
 131class DownloadError(Exception):
 132	"""Download Error exception.
 133
 134	This exception may be thrown by FileDownloader objects if they are not
 135	configured to continue on errors. They will contain the appropriate
 136	error message.
 137	"""
 138	pass
 139
 140class SameFileError(Exception):
 141	"""Same File exception.
 142
 143	This exception will be thrown by FileDownloader objects if they detect
 144	multiple files would have to be downloaded to the same file on disk.
 145	"""
 146	pass
 147
 148class PostProcessingError(Exception):
 149	"""Post Processing exception.
 150
 151	This exception may be raised by PostProcessor's .run() method to
 152	indicate an error in the postprocessing task.
 153	"""
 154	pass
 155
 156class UnavailableVideoError(Exception):
 157	"""Unavailable Format exception.
 158
 159	This exception will be thrown when a video is requested
 160	in a format that is not available for that video.
 161	"""
 162	pass
 163
 164class ContentTooShortError(Exception):
 165	"""Content Too Short exception.
 166
 167	This exception may be raised by FileDownloader objects when a file they
 168	download is too small for what the server announced first, indicating
 169	the connection was probably interrupted.
 170	"""
 171	# Both in bytes
 172	downloaded = None
 173	expected = None
 174
 175	def __init__(self, downloaded, expected):
 176		self.downloaded = downloaded
 177		self.expected = expected
 178
 179class YoutubeDLHandler(urllib2.HTTPHandler):
 180	"""Handler for HTTP requests and responses.
 181
 182	This class, when installed with an OpenerDirector, automatically adds
 183	the standard headers to every HTTP request and handles gzipped and
 184	deflated responses from web servers. If compression is to be avoided in
 185	a particular request, the original request in the program code only has
 186	to include the HTTP header "Youtubedl-No-Compression", which will be
 187	removed before making the real request.
 188	
 189	Part of this code was copied from:
 190
 191	  http://techknack.net/python-urllib2-handlers/
 192	  
 193	Andrew Rowls, the author of that code, agreed to release it to the
 194	public domain.
 195	"""
 196
 197	@staticmethod
 198	def deflate(data):
 199		try:
 200			return zlib.decompress(data, -zlib.MAX_WBITS)
 201		except zlib.error:
 202			return zlib.decompress(data)
 203	
 204	@staticmethod
 205	def addinfourl_wrapper(stream, headers, url, code):
 206		if hasattr(urllib2.addinfourl, 'getcode'):
 207			return urllib2.addinfourl(stream, headers, url, code)
 208		ret = urllib2.addinfourl(stream, headers, url)
 209		ret.code = code
 210		return ret
 211	
 212	def http_request(self, req):
 213		for h in std_headers:
 214			if h in req.headers:
 215				del req.headers[h]
 216			req.add_header(h, std_headers[h])
 217		if 'Youtubedl-no-compression' in req.headers:
 218			if 'Accept-encoding' in req.headers:
 219				del req.headers['Accept-encoding']
 220			del req.headers['Youtubedl-no-compression']
 221		return req
 222
 223	def http_response(self, req, resp):
 224		old_resp = resp
 225		# gzip
 226		if resp.headers.get('Content-encoding', '') == 'gzip':
 227			gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
 228			resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 229			resp.msg = old_resp.msg
 230		# deflate
 231		if resp.headers.get('Content-encoding', '') == 'deflate':
 232			gz = StringIO.StringIO(self.deflate(resp.read()))
 233			resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 234			resp.msg = old_resp.msg
 235		return resp
 236
 237class FileDownloader(object):
 238	"""File Downloader class.
 239
 240	File downloader objects are the ones responsible of downloading the
 241	actual video file and writing it to disk if the user has requested
 242	it, among some other tasks. In most cases there should be one per
 243	program. As, given a video URL, the downloader doesn't know how to
 244	extract all the needed information, task that InfoExtractors do, it
 245	has to pass the URL to one of them.
 246
 247	For this, file downloader objects have a method that allows
 248	InfoExtractors to be registered in a given order. When it is passed
 249	a URL, the file downloader handles it to the first InfoExtractor it
 250	finds that reports being able to handle it. The InfoExtractor extracts
 251	all the information about the video or videos the URL refers to, and
 252	asks the FileDownloader to process the video information, possibly
 253	downloading the video.
 254
 255	File downloaders accept a lot of parameters. In order not to saturate
 256	the object constructor with arguments, it receives a dictionary of
 257	options instead. These options are available through the params
 258	attribute for the InfoExtractors to use. The FileDownloader also
 259	registers itself as the downloader in charge for the InfoExtractors
 260	that are added to it, so this is a "mutual registration".
 261
 262	Available options:
 263
 264	username:         Username for authentication purposes.
 265	password:         Password for authentication purposes.
 266	usenetrc:         Use netrc for authentication instead.
 267	quiet:            Do not print messages to stdout.
 268	forceurl:         Force printing final URL.
 269	forcetitle:       Force printing title.
 270	forcethumbnail:   Force printing thumbnail URL.
 271	forcedescription: Force printing description.
 272	forcefilename:    Force printing final filename.
 273	simulate:         Do not download the video files.
 274	format:           Video format code.
 275	format_limit:     Highest quality format to try.
 276	outtmpl:          Template for output names.
 277	ignoreerrors:     Do not stop on download errors.
 278	ratelimit:        Download speed limit, in bytes/sec.
 279	nooverwrites:     Prevent overwriting files.
 280	retries:          Number of times to retry for HTTP error 5xx
 281	continuedl:       Try to continue downloads if possible.
 282	noprogress:       Do not print the progress bar.
 283	playliststart:    Playlist item to start at.
 284	playlistend:      Playlist item to end at.
 285	logtostderr:      Log messages to stderr instead of stdout.
 286	consoletitle:     Display progress in console window's titlebar.
 287	nopart:           Do not use temporary .part files.
 288	updatetime:       Use the Last-modified header to set output file timestamps.
 289	"""
 290
 291	params = None
 292	_ies = []
 293	_pps = []
 294	_download_retcode = None
 295	_num_downloads = None
 296	_screen_file = None
 297
 298	def __init__(self, params):
 299		"""Create a FileDownloader object with the given options."""
 300		self._ies = []
 301		self._pps = []
 302		self._download_retcode = 0
 303		self._num_downloads = 0
 304		self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 305		self.params = params
 306
 307	@staticmethod
 308	def pmkdir(filename):
 309		"""Create directory components in filename. Similar to Unix "mkdir -p"."""
 310		components = filename.split(os.sep)
 311		aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
 312		aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
 313		for dir in aggregate:
 314			if not os.path.exists(dir):
 315				os.mkdir(dir)
 316
 317	@staticmethod
 318	def format_bytes(bytes):
 319		if bytes is None:
 320			return 'N/A'
 321		if type(bytes) is str:
 322			bytes = float(bytes)
 323		if bytes == 0.0:
 324			exponent = 0
 325		else:
 326			exponent = long(math.log(bytes, 1024.0))
 327		suffix = 'bkMGTPEZY'[exponent]
 328		converted = float(bytes) / float(1024**exponent)
 329		return '%.2f%s' % (converted, suffix)
 330
 331	@staticmethod
 332	def calc_percent(byte_counter, data_len):
 333		if data_len is None:
 334			return '---.-%'
 335		return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 336
 337	@staticmethod
 338	def calc_eta(start, now, total, current):
 339		if total is None:
 340			return '--:--'
 341		dif = now - start
 342		if current == 0 or dif < 0.001: # One millisecond
 343			return '--:--'
 344		rate = float(current) / dif
 345		eta = long((float(total) - float(current)) / rate)
 346		(eta_mins, eta_secs) = divmod(eta, 60)
 347		if eta_mins > 99:
 348			return '--:--'
 349		return '%02d:%02d' % (eta_mins, eta_secs)
 350
 351	@staticmethod
 352	def calc_speed(start, now, bytes):
 353		dif = now - start
 354		if bytes == 0 or dif < 0.001: # One millisecond
 355			return '%10s' % '---b/s'
 356		return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 357
 358	@staticmethod
 359	def best_block_size(elapsed_time, bytes):
 360		new_min = max(bytes / 2.0, 1.0)
 361		new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 362		if elapsed_time < 0.001:
 363			return long(new_max)
 364		rate = bytes / elapsed_time
 365		if rate > new_max:
 366			return long(new_max)
 367		if rate < new_min:
 368			return long(new_min)
 369		return long(rate)
 370
 371	@staticmethod
 372	def parse_bytes(bytestr):
 373		"""Parse a string indicating a byte quantity into a long integer."""
 374		matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 375		if matchobj is None:
 376			return None
 377		number = float(matchobj.group(1))
 378		multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 379		return long(round(number * multiplier))
 380
 381	def add_info_extractor(self, ie):
 382		"""Add an InfoExtractor object to the end of the list."""
 383		self._ies.append(ie)
 384		ie.set_downloader(self)
 385
 386	def add_post_processor(self, pp):
 387		"""Add a PostProcessor object to the end of the chain."""
 388		self._pps.append(pp)
 389		pp.set_downloader(self)
 390
 391	def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
 392		"""Print message to stdout if not in quiet mode."""
 393		try:
 394			if not self.params.get('quiet', False):
 395				terminator = [u'\n', u''][skip_eol]
 396				print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
 397			self._screen_file.flush()
 398		except (UnicodeEncodeError), err:
 399			if not ignore_encoding_errors:
 400				raise
 401
 402	def to_stderr(self, message):
 403		"""Print message to stderr."""
 404		print >>sys.stderr, message.encode(preferredencoding())
 405
 406	def to_cons_title(self, message):
 407		"""Set console/terminal window title to message."""
 408		if not self.params.get('consoletitle', False):
 409			return
 410		if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 411			# c_wchar_p() might not be necessary if `message` is
 412			# already of type unicode()
 413			ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 414		elif 'TERM' in os.environ:
 415			sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
 416
 417	def fixed_template(self):
 418		"""Checks if the output template is fixed."""
 419		return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 420
 421	def trouble(self, message=None):
 422		"""Determine action to take when a download problem appears.
 423
 424		Depending on if the downloader has been configured to ignore
 425		download errors or not, this method may throw an exception or
 426		not when errors are found, after printing the message.
 427		"""
 428		if message is not None:
 429			self.to_stderr(message)
 430		if not self.params.get('ignoreerrors', False):
 431			raise DownloadError(message)
 432		self._download_retcode = 1
 433
 434	def slow_down(self, start_time, byte_counter):
 435		"""Sleep if the download speed is over the rate limit."""
 436		rate_limit = self.params.get('ratelimit', None)
 437		if rate_limit is None or byte_counter == 0:
 438			return
 439		now = time.time()
 440		elapsed = now - start_time
 441		if elapsed <= 0.0:
 442			return
 443		speed = float(byte_counter) / elapsed
 444		if speed > rate_limit:
 445			time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 446
 447	def temp_name(self, filename):
 448		"""Returns a temporary filename for the given filename."""
 449		if self.params.get('nopart', False) or filename == u'-' or \
 450				(os.path.exists(filename) and not os.path.isfile(filename)):
 451			return filename
 452		return filename + u'.part'
 453
 454	def undo_temp_name(self, filename):
 455		if filename.endswith(u'.part'):
 456			return filename[:-len(u'.part')]
 457		return filename
 458
 459	def try_rename(self, old_filename, new_filename):
 460		try:
 461			if old_filename == new_filename:
 462				return
 463			os.rename(old_filename, new_filename)
 464		except (IOError, OSError), err:
 465			self.trouble(u'ERROR: unable to rename file')
 466	
 467	def try_utime(self, filename, last_modified_hdr):
 468		"""Try to set the last-modified time of the given file."""
 469		if last_modified_hdr is None:
 470			return
 471		if not os.path.isfile(filename):
 472			return
 473		timestr = last_modified_hdr
 474		if timestr is None:
 475			return
 476		filetime = timeconvert(timestr)
 477		if filetime is None:
 478			return
 479		try:
 480			os.utime(filename,(time.time(), filetime))
 481		except:
 482			pass
 483
 484	def report_destination(self, filename):
 485		"""Report destination filename."""
 486		self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
 487
 488	def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 489		"""Report download progress."""
 490		if self.params.get('noprogress', False):
 491			return
 492		self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
 493				(percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 494		self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 495				(percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 496
 497	def report_resuming_byte(self, resume_len):
 498		"""Report attempt to resume at given byte."""
 499		self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 500
 501	def report_retry(self, count, retries):
 502		"""Report retry in case of HTTP error 5xx"""
 503		self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 504
 505	def report_file_already_downloaded(self, file_name):
 506		"""Report file has already been fully downloaded."""
 507		try:
 508			self.to_screen(u'[download] %s has already been downloaded' % file_name)
 509		except (UnicodeEncodeError), err:
 510			self.to_screen(u'[download] The file has already been downloaded')
 511
 512	def report_unable_to_resume(self):
 513		"""Report it was impossible to resume download."""
 514		self.to_screen(u'[download] Unable to resume')
 515
 516	def report_finish(self):
 517		"""Report download finished."""
 518		if self.params.get('noprogress', False):
 519			self.to_screen(u'[download] Download completed')
 520		else:
 521			self.to_screen(u'')
 522
 523	def increment_downloads(self):
 524		"""Increment the ordinal that assigns a number to each file."""
 525		self._num_downloads += 1
 526
 527	def prepare_filename(self, info_dict):
 528		"""Generate the output filename."""
 529		try:
 530			template_dict = dict(info_dict)
 531			template_dict['epoch'] = unicode(long(time.time()))
 532			template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
 533			filename = self.params['outtmpl'] % template_dict
 534			return filename
 535		except (ValueError, KeyError), err:
 536			self.trouble(u'ERROR: invalid system charset or erroneous output template')
 537			return None
 538
 539	def process_info(self, info_dict):
 540		"""Process a single dictionary returned by an InfoExtractor."""
 541		filename = self.prepare_filename(info_dict)
 542		# Do nothing else if in simulate mode
 543		if self.params.get('simulate', False):
 544			# Forced printings
 545			if self.params.get('forcetitle', False):
 546				print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
 547			if self.params.get('forceurl', False):
 548				print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
 549			if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 550				print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
 551			if self.params.get('forcedescription', False) and 'description' in info_dict:
 552				print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
 553			if self.params.get('forcefilename', False) and filename is not None:
 554				print filename.encode(preferredencoding(), 'xmlcharrefreplace')
 555
 556			return
 557
 558		if filename is None:
 559			return
 560		if self.params.get('nooverwrites', False) and os.path.exists(filename):
 561			self.to_stderr(u'WARNING: file exists and will be skipped')
 562			return
 563
 564		try:
 565			self.pmkdir(filename)
 566		except (OSError, IOError), err:
 567			self.trouble(u'ERROR: unable to create directories: %s' % str(err))
 568			return
 569
 570		try:
 571			success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
 572		except (OSError, IOError), err:
 573			raise UnavailableVideoError
 574		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 575			self.trouble(u'ERROR: unable to download video data: %s' % str(err))
 576			return
 577		except (ContentTooShortError, ), err:
 578			self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 579			return
 580
 581		if success:
 582			try:
 583				self.post_process(filename, info_dict)
 584			except (PostProcessingError), err:
 585				self.trouble(u'ERROR: postprocessing: %s' % str(err))
 586				return
 587
 588	def download(self, url_list):
 589		"""Download a given list of URLs."""
 590		if len(url_list) > 1 and self.fixed_template():
 591			raise SameFileError(self.params['outtmpl'])
 592
 593		for url in url_list:
 594			suitable_found = False
 595			for ie in self._ies:
 596				# Go to next InfoExtractor if not suitable
 597				if not ie.suitable(url):
 598					continue
 599
 600				# Suitable InfoExtractor found
 601				suitable_found = True
 602
 603				# Extract information from URL and process it
 604				ie.extract(url)
 605
 606				# Suitable InfoExtractor had been found; go to next URL
 607				break
 608
 609			if not suitable_found:
 610				self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
 611
 612		return self._download_retcode
 613
 614	def post_process(self, filename, ie_info):
 615		"""Run the postprocessing chain on the given file."""
 616		info = dict(ie_info)
 617		info['filepath'] = filename
 618		for pp in self._pps:
 619			info = pp.run(info)
 620			if info is None:
 621				break
 622
 623	def _download_with_rtmpdump(self, filename, url, player_url):
 624		self.report_destination(filename)
 625		tmpfilename = self.temp_name(filename)
 626
 627		# Check for rtmpdump first
 628		try:
 629			subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 630		except (OSError, IOError):
 631			self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 632			return False
 633
 634		# Download using rtmpdump. rtmpdump returns exit code 2 when
 635		# the connection was interrumpted and resuming appears to be
 636		# possible. This is part of rtmpdump's normal usage, AFAIK.
 637		basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
 638		retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
 639		while retval == 2 or retval == 1:
 640			prevsize = os.path.getsize(tmpfilename)
 641			self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 642			time.sleep(5.0) # This seems to be needed
 643			retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 644			cursize = os.path.getsize(tmpfilename)
 645			if prevsize == cursize and retval == 1:
 646				break
 647		if retval == 0:
 648			self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
 649			self.try_rename(tmpfilename, filename)
 650			return True
 651		else:
 652			self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
 653			return False
 654
 655	def _do_download(self, filename, url, player_url):
 656		# Check file already present
 657		if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
 658			self.report_file_already_downloaded(filename)
 659			return True
 660
 661		# Attempt to download using rtmpdump
 662		if url.startswith('rtmp'):
 663			return self._download_with_rtmpdump(filename, url, player_url)
 664
 665		tmpfilename = self.temp_name(filename)
 666		stream = None
 667		open_mode = 'wb'
 668
 669		# Do not include the Accept-Encoding header
 670		headers = {'Youtubedl-no-compression': 'True'}
 671		basic_request = urllib2.Request(url, None, headers)
 672		request = urllib2.Request(url, None, headers)
 673
 674		# Establish possible resume length
 675		if os.path.isfile(tmpfilename):
 676			resume_len = os.path.getsize(tmpfilename)
 677		else:
 678			resume_len = 0
 679
 680		# Request parameters in case of being able to resume
 681		if self.params.get('continuedl', False) and resume_len != 0:
 682			self.report_resuming_byte(resume_len)
 683			request.add_header('Range','bytes=%d-' % resume_len)
 684			open_mode = 'ab'
 685
 686		count = 0
 687		retries = self.params.get('retries', 0)
 688		while count <= retries:
 689			# Establish connection
 690			try:
 691				data = urllib2.urlopen(request)
 692				break
 693			except (urllib2.HTTPError, ), err:
 694				if (err.code < 500 or err.code >= 600) and err.code != 416:
 695					# Unexpected HTTP error
 696					raise
 697				elif err.code == 416:
 698					# Unable to resume (requested range not satisfiable)
 699					try:
 700						# Open the connection again without the range header
 701						data = urllib2.urlopen(basic_request)
 702						content_length = data.info()['Content-Length']
 703					except (urllib2.HTTPError, ), err:
 704						if err.code < 500 or err.code >= 600:
 705							raise
 706					else:
 707						# Examine the reported length
 708						if (content_length is not None and
 709						    (resume_len - 100 < long(content_length) < resume_len + 100)):
 710							# The file had already been fully downloaded.
 711							# Explanation to the above condition: in issue #175 it was revealed that
 712							# YouTube sometimes adds or removes a few bytes from the end of the file,
 713							# changing the file size slightly and causing problems for some users. So
 714							# I decided to implement a suggested change and consider the file
 715							# completely downloaded if the file size differs less than 100 bytes from
 716							# the one in the hard drive.
 717							self.report_file_already_downloaded(filename)
 718							self.try_rename(tmpfilename, filename)
 719							return True
 720						else:
 721							# The length does not match, we start the download over
 722							self.report_unable_to_resume()
 723							open_mode = 'wb'
 724							break
 725			# Retry
 726			count += 1
 727			if count <= retries:
 728				self.report_retry(count, retries)
 729
 730		if count > retries:
 731			self.trouble(u'ERROR: giving up after %s retries' % retries)
 732			return False
 733
 734		data_len = data.info().get('Content-length', None)
 735		if data_len is not None:
 736			data_len = long(data_len) + resume_len
 737		data_len_str = self.format_bytes(data_len)
 738		byte_counter = 0 + resume_len
 739		block_size = 1024
 740		start = time.time()
 741		while True:
 742			# Download and write
 743			before = time.time()
 744			data_block = data.read(block_size)
 745			after = time.time()
 746			if len(data_block) == 0:
 747				break
 748			byte_counter += len(data_block)
 749
 750			# Open file just in time
 751			if stream is None:
 752				try:
 753					(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
 754					filename = self.undo_temp_name(tmpfilename)
 755					self.report_destination(filename)
 756				except (OSError, IOError), err:
 757					self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
 758					return False
 759			try:
 760				stream.write(data_block)
 761			except (IOError, OSError), err:
 762				self.trouble(u'\nERROR: unable to write data: %s' % str(err))
 763				return False
 764			block_size = self.best_block_size(after - before, len(data_block))
 765
 766			# Progress message
 767			percent_str = self.calc_percent(byte_counter, data_len)
 768			eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
 769			speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
 770			self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 771
 772			# Apply rate limit
 773			self.slow_down(start, byte_counter - resume_len)
 774
 775		stream.close()
 776		self.report_finish()
 777		if data_len is not None and byte_counter != data_len:
 778			raise ContentTooShortError(byte_counter, long(data_len))
 779		self.try_rename(tmpfilename, filename)
 780
 781		# Update file modification time
 782		if self.params.get('updatetime', True):
 783			self.try_utime(filename, data.info().get('last-modified', None))
 784
 785		return True
 786
 787class InfoExtractor(object):
 788	"""Information Extractor class.
 789
 790	Information extractors are the classes that, given a URL, extract
 791	information from the video (or videos) the URL refers to. This
 792	information includes the real video URL, the video title and simplified
 793	title, author and others. The information is stored in a dictionary
 794	which is then passed to the FileDownloader. The FileDownloader
 795	processes this information possibly downloading the video to the file
 796	system, among other possible outcomes. The dictionaries must include
 797	the following fields:
 798
 799	id:		Video identifier.
 800	url:		Final video URL.
 801	uploader:	Nickname of the video uploader.
 802	title:		Literal title.
 803	stitle:		Simplified title.
 804	ext:		Video filename extension.
 805	format:		Video format.
 806	player_url:	SWF Player URL (may be None).
 807
 808	The following fields are optional. Their primary purpose is to allow
 809	youtube-dl to serve as the backend for a video search function, such
 810	as the one in youtube2mp3.  They are only used when their respective
 811	forced printing functions are called:
 812
 813	thumbnail:	Full URL to a video thumbnail image.
 814	description:	One-line video description.
 815
 816	Subclasses of this one should re-define the _real_initialize() and
 817	_real_extract() methods, as well as the suitable() static method.
 818	Probably, they should also be instantiated and added to the main
 819	downloader.
 820	"""
 821
 822	_ready = False
 823	_downloader = None
 824
 825	def __init__(self, downloader=None):
 826		"""Constructor. Receives an optional downloader."""
 827		self._ready = False
 828		self.set_downloader(downloader)
 829
 830	@staticmethod
 831	def suitable(url):
 832		"""Receives a URL and returns True if suitable for this IE."""
 833		return False
 834
 835	def initialize(self):
 836		"""Initializes an instance (authentication, etc)."""
 837		if not self._ready:
 838			self._real_initialize()
 839			self._ready = True
 840
 841	def extract(self, url):
 842		"""Extracts URL information and returns it in list of dicts."""
 843		self.initialize()
 844		return self._real_extract(url)
 845
 846	def set_downloader(self, downloader):
 847		"""Sets the downloader for this IE."""
 848		self._downloader = downloader
 849
 850	def _real_initialize(self):
 851		"""Real initialization process. Redefine in subclasses."""
 852		pass
 853
 854	def _real_extract(self, url):
 855		"""Real extraction process. Redefine in subclasses."""
 856		pass
 857
 858class YoutubeIE(InfoExtractor):
 859	"""Information extractor for youtube.com."""
 860
 861	_VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
 862	_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
 863	_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
 864	_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
 865	_NETRC_MACHINE = 'youtube'
 866	# Listed in order of quality
 867	_available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
 868	_video_extensions = {
 869		'13': '3gp',
 870		'17': 'mp4',
 871		'18': 'mp4',
 872		'22': 'mp4',
 873		'37': 'mp4',
 874		'38': 'video', # You actually don't know if this will be MOV, AVI or whatever
 875		'43': 'webm',
 876		'45': 'webm',
 877	}
 878
 879	@staticmethod
 880	def suitable(url):
 881		return (re.match(YoutubeIE._VALID_URL, url) is not None)
 882
 883	def report_lang(self):
 884		"""Report attempt to set language."""
 885		self._downloader.to_screen(u'[youtube] Setting language')
 886
 887	def report_login(self):
 888		"""Report attempt to log in."""
 889		self._downloader.to_screen(u'[youtube] Logging in')
 890
 891	def report_age_confirmation(self):
 892		"""Report attempt to confirm age."""
 893		self._downloader.to_screen(u'[youtube] Confirming age')
 894
 895	def report_video_webpage_download(self, video_id):
 896		"""Report attempt to download video webpage."""
 897		self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
 898
 899	def report_video_info_webpage_download(self, video_id):
 900		"""Report attempt to download video info webpage."""
 901		self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
 902
 903	def report_information_extraction(self, video_id):
 904		"""Report attempt to extract video information."""
 905		self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
 906
 907	def report_unavailable_format(self, video_id, format):
 908		"""Report extracted video URL."""
 909		self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
 910
 911	def report_rtmp_download(self):
 912		"""Indicate the download will use the RTMP protocol."""
 913		self._downloader.to_screen(u'[youtube] RTMP download detected')
 914
 915	def _real_initialize(self):
 916		if self._downloader is None:
 917			return
 918
 919		username = None
 920		password = None
 921		downloader_params = self._downloader.params
 922
 923		# Attempt to use provided username and password or .netrc data
 924		if downloader_params.get('username', None) is not None:
 925			username = downloader_params['username']
 926			password = downloader_params['password']
 927		elif downloader_params.get('usenetrc', False):
 928			try:
 929				info = netrc.netrc().authenticators(self._NETRC_MACHINE)
 930				if info is not None:
 931					username = info[0]
 932					password = info[2]
 933				else:
 934					raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 935			except (IOError, netrc.NetrcParseError), err:
 936				self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
 937				return
 938
 939		# Set language
 940		request = urllib2.Request(self._LANG_URL)
 941		try:
 942			self.report_lang()
 943			urllib2.urlopen(request).read()
 944		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 945			self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
 946			return
 947
 948		# No authentication to be performed
 949		if username is None:
 950			return
 951
 952		# Log in
 953		login_form = {
 954				'current_form': 'loginForm',
 955				'next':		'/',
 956				'action_login':	'Log In',
 957				'username':	username,
 958				'password':	password,
 959				}
 960		request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
 961		try:
 962			self.report_login()
 963			login_results = urllib2.urlopen(request).read()
 964			if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
 965				self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
 966				return
 967		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 968			self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
 969			return
 970
 971		# Confirm age
 972		age_form = {
 973				'next_url':		'/',
 974				'action_confirm':	'Confirm',
 975				}
 976		request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
 977		try:
 978			self.report_age_confirmation()
 979			age_results = urllib2.urlopen(request).read()
 980		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 981			self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 982			return
 983
 984	def _real_extract(self, url):
 985		# Extract video id from URL
 986		mobj = re.match(self._VALID_URL, url)
 987		if mobj is None:
 988			self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 989			return
 990		video_id = mobj.group(2)
 991
 992		# Get video webpage
 993		self.report_video_webpage_download(video_id)
 994		request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id)
 995		try:
 996			video_webpage = urllib2.urlopen(request).read()
 997		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 998			self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
 999			return
1000
1001		# Attempt to extract SWF player URL
1002		mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1003		if mobj is not None:
1004			player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1005		else:
1006			player_url = None
1007
1008		# Get video info
1009		self.report_video_info_webpage_download(video_id)
1010		for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1011			video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1012					   % (video_id, el_type))
1013			request = urllib2.Request(video_info_url)
1014			try:
1015				video_info_webpage = urllib2.urlopen(request).read()
1016				video_info = parse_qs(video_info_webpage)
1017				if 'token' in video_info:
1018					break
1019			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1020				self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
1021				return
1022		if 'token' not in video_info:
1023			if 'reason' in video_info:
1024				self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
1025			else:
1026				self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
1027			return
1028
1029		# Start extracting information
1030		self.report_information_extraction(video_id)
1031
1032		# uploader
1033		if 'author' not in video_info:
1034			self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1035			return
1036		video_uploader = urllib.unquote_plus(video_info['author'][0])
1037
1038		# title
1039		if 'title' not in video_info:
1040			self._downloader.trouble(u'ERROR: unable to extract video title')
1041			return
1042		video_title = urllib.unquote_plus(video_info['title'][0])
1043		video_title = video_title.decode('utf-8')
1044		video_title = sanitize_title(video_title)
1045
1046		# simplified title
1047		simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1048		simple_title = simple_title.strip(ur'_')
1049
1050		# thumbnail image
1051		if 'thumbnail_url' not in video_info:
1052			self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1053			video_thumbnail = ''
1054		else:	# don't panic if we can't find it
1055			video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1056
1057		# upload date
1058		upload_date = u'NA'
1059		mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1060		if mobj is not None:
1061			upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1062			format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
1063			for expression in format_expressions:
1064				try:
1065					upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1066				except:
1067					pass
1068
1069		# description
1070		video_description = 'No description available.'
1071		if self._downloader.params.get('forcedescription', False):
1072			mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1073			if mobj is not None:
1074				video_description = mobj.group(1)
1075
1076		# token
1077		video_token = urllib.unquote_plus(video_info['token'][0])
1078
1079		# Decide which formats to download
1080		req_format = self._downloader.params.get('format', None)
1081
1082		if 'fmt_url_map' in video_info and len(video_info['fmt_url_map']) >= 1 and ',' in video_info['fmt_url_map'][0]:
1083			url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
1084			format_limit = self._downloader.params.get('format_limit', None)
1085			if format_limit is not None and format_limit in self._available_formats:
1086				format_list = self._available_formats[self._available_formats.index(format_limit):]
1087			else:
1088				format_list = self._available_formats
1089			existing_formats = [x for x in format_list if x in url_map]
1090			if len(existing_formats) == 0:
1091				self._downloader.trouble(u'ERROR: no known formats available for video')
1092				return
1093			if req_format is None:
1094				video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1095			elif req_format == '-1':
1096				video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1097			else:
1098				# Specific format
1099				if req_format not in url_map:
1100					self._downloader.trouble(u'ERROR: requested format not available')
1101					return
1102				video_url_list = [(req_format, url_map[req_format])] # Specific format
1103
1104		elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1105			self.report_rtmp_download()
1106			video_url_list = [(None, video_info['conn'][0])]
1107
1108		else:
1109			self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
1110			return
1111
1112		for format_param, video_real_url in video_url_list:
1113			# At this point we have a new video
1114			self._downloader.increment_downloads()
1115
1116			# Extension
1117			video_extension = self._video_extensions.get(format_param, 'flv')
1118
1119			# Find the video URL in fmt_url_map or conn paramters
1120			try:
1121				# Process video information
1122				self._downloader.process_info({
1123					'id':		video_id.decode('utf-8'),
1124					'url':		video_real_url.decode('utf-8'),
1125					'uploader':	video_uploader.decode('utf-8'),
1126					'upload_date':	upload_date,
1127					'title':	video_title,
1128					'stitle':	simple_title,
1129					'ext':		video_extension.decode('utf-8'),
1130					'format':	(format_param is None and u'NA' or format_param.decode('utf-8')),
1131					'thumbnail':	video_thumbnail.decode('utf-8'),
1132					'description':	video_description.decode('utf-8'),
1133					'player_url':	player_url,
1134				})
1135			except UnavailableVideoError, err:
1136				self._downloader.trouble(u'\nERROR: unable to download video')
1137
1138
1139class MetacafeIE(InfoExtractor):
1140	"""Information Extractor for metacafe.com."""
1141
1142	_VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1143	_DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1144	_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1145	_youtube_ie = None
1146
1147	def __init__(self, youtube_ie, downloader=None):
1148		InfoExtractor.__init__(self, downloader)
1149		self._youtube_ie = youtube_ie
1150
1151	@staticmethod
1152	def suitable(url):
1153		return (re.match(MetacafeIE._VALID_URL, url) is not None)
1154
1155	def report_disclaimer(self):
1156		"""Report disclaimer retrieval."""
1157		self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1158
1159	def report_age_confirmation(self):
1160		"""Report attempt to confirm age."""
1161		self._downloader.to_screen(u'[metacafe] Confirming age')
1162
1163	def report_download_webpage(self, video_id):
1164		"""Report webpage download."""
1165		self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1166
1167	def report_extraction(self, video_id):
1168		"""Report information extraction."""
1169		self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1170
1171	def _real_initialize(self):
1172		# Retrieve disclaimer
1173		request = urllib2.Request(self._DISCLAIMER)
1174		try:
1175			self.report_disclaimer()
1176			disclaimer = urllib2.urlopen(request).read()
1177		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1178			self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1179			return
1180
1181		# Confirm age
1182		disclaimer_form = {
1183			'filters': '0',
1184			'submit': "Continue - I'm over 18",
1185			}
1186		request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
1187		try:
1188			self.report_age_confirmation()
1189			disclaimer = urllib2.urlopen(request).read()
1190		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1191			self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1192			return
1193
1194	def _real_extract(self, url):
1195		# Extract id and simplified title from URL
1196		mobj = re.match(self._VALID_URL, url)
1197		if mobj is None:
1198			self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1199			return
1200
1201		video_id = mobj.group(1)
1202
1203		# Check if video comes from YouTube
1204		mobj2 = re.match(r'^yt-(.*)$', video_id)
1205		if mobj2 is not None:
1206			self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1207			return
1208
1209		# At this point we have a new video
1210		self._downloader.increment_downloads()
1211
1212		simple_title = mobj.group(2).decode('utf-8')
1213
1214		# Retrieve video webpage to extract further information
1215		request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1216		try:
1217			self.report_download_webpage(video_id)
1218			webpage = urllib2.urlopen(request).read()
1219		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1220			self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1221			return
1222
1223		# Extract URL, uploader and title from webpage
1224		self.report_extraction(video_id)
1225		mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1226		if mobj is not None:
1227			mediaURL = urllib.unquote(mobj.group(1))
1228			video_extension = mediaURL[-3:]
1229
1230			# Extract gdaKey if available
1231			mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1232			if mobj is None:
1233				video_url = mediaURL
1234			else:
1235				gdaKey = mobj.group(1)
1236				video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1237		else:
1238			mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1239			if mobj is None:
1240				self._downloader.trouble(u'ERROR: unable to extract media URL')
1241				return
1242			vardict = parse_qs(mobj.group(1))
1243			if 'mediaData' not in vardict:
1244				self._downloader.trouble(u'ERROR: unable to extract media URL')
1245				return
1246			mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1247			if mobj is None:
1248				self._downloader.trouble(u'ERROR: unable to extract media URL')
1249				return
1250			mediaURL = mobj.group(1).replace('\\/', '/')
1251			video_extension = mediaURL[-3:]
1252			video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1253
1254		mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1255		if mobj is None:
1256			self._downloader.trouble(u'ERROR: unable to extract title')
1257			return
1258		video_title = mobj.group(1).decode('utf-8')
1259		video_title = sanitize_title(video_title)
1260
1261		mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1262		if mobj is None:
1263			self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1264			return
1265		video_uploader = mobj.group(1)
1266
1267		try:
1268			# Process video information
1269			self._downloader.process_info({
1270				'id':		video_id.decode('utf-8'),
1271				'url':		video_url.decode('utf-8'),
1272				'uploader':	video_uploader.decode('utf-8'),
1273				'upload_date':	u'NA',
1274				'title':	video_title,
1275				'stitle':	simple_title,
1276				'ext':		video_extension.decode('utf-8'),
1277				'format':	u'NA',
1278				'player_url':	None,
1279			})
1280		except UnavailableVideoError:
1281			self._downloader.trouble(u'\nERROR: unable to download video')
1282
1283
1284class DailymotionIE(InfoExtractor):
1285	"""Information Extractor for Dailymotion"""
1286
1287	_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1288
1289	def __init__(self, downloader=None):
1290		InfoExtractor.__init__(self, downloader)
1291
1292	@staticmethod
1293	def suitable(url):
1294		return (re.match(DailymotionIE._VALID_URL, url) is not None)
1295
1296	def report_download_webpage(self, video_id):
1297		"""Report webpage download."""
1298		self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1299
1300	def report_extraction(self, video_id):
1301		"""Report information extraction."""
1302		self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1303
1304	def _real_initialize(self):
1305		return
1306
1307	def _real_extract(self, url):
1308		# Extract id and simplified title from URL
1309		mobj = re.match(self._VALID_URL, url)
1310		if mobj is None:
1311			self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1312			return
1313
1314		# At this point we have a new video
1315		self._downloader.increment_downloads()
1316		video_id = mobj.group(1)
1317
1318		simple_title = mobj.group(2).decode('utf-8')
1319		video_extension = 'flv'
1320
1321		# Retrieve video webpage to extract further information
1322		request = urllib2.Request(url)
1323		try:
1324			self.report_download_webpage(video_id)
1325			webpage = urllib2.urlopen(request).read()
1326		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1327			self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1328			return
1329
1330		# Extract URL, uploader and title from webpage
1331		self.report_extraction(video_id)
1332		mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1333		if mobj is None:
1334			self._downloader.trouble(u'ERROR: unable to extract media URL')
1335			return
1336		mediaURL = urllib.unquote(mobj.group(1))
1337
1338		# if needed add http://www.dailymotion.com/ if relative URL
1339
1340		video_url = mediaURL
1341
1342		# '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1343		mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1344		if mobj is None:
1345			self._downloader.trouble(u'ERROR: unable to extract title')
1346			return
1347		video_title = mobj.group(1).decode('utf-8')
1348		video_title = sanitize_title(video_title)
1349
1350		mobj = re.search(r'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage)
1351		if mobj is None:
1352			self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1353			return
1354		video_uploader = mobj.group(1)
1355
1356		try:
1357			# Process video information
1358			self._downloader.process_info({
1359				'id':		video_id.decode('utf-8'),
1360				'url':		video_url.decode('utf-8'),
1361				'uploader':	video_uploader.decode('utf-8'),
1362				'upload_date':	u'NA',
1363				'title':	video_title,
1364				'stitle':	simple_title,
1365				'ext':		video_extension.decode('utf-8'),
1366				'format':	u'NA',
1367				'player_url':	None,
1368			})
1369		except UnavailableVideoError:
1370			self._downloader.trouble(u'\nERROR: unable to download video')
1371
1372class GoogleIE(InfoExtractor):
1373	"""Information extractor for video.google.com."""
1374
1375	_VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1376
1377	def __init__(self, downloader=None):
1378		InfoExtractor.__init__(self, downloader)
1379
1380	@staticmethod
1381	def suitable(url):
1382		return (re.match(GoogleIE._VALID_URL, url) is not None)
1383
1384	def report_download_webpage(self, video_id):
1385		"""Report webpage download."""
1386		self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1387
1388	def report_extraction(self, video_id):
1389		"""Report information extraction."""
1390		self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1391
1392	def _real_initialize(self):
1393		return
1394
1395	def _real_extract(self, url):
1396		# Extract id from URL
1397		mobj = re.match(self._VALID_URL, url)
1398		if mobj is None:
1399			self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1400			return
1401
1402		# At this point we have a new video
1403		self._downloader.increment_downloads()
1404		video_id = mobj.group(1)
1405
1406		video_extension = 'mp4'
1407
1408		# Retrieve video webpage to extract further information
1409		request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1410		try:
1411			self.report_download_webpage(video_id)
1412			webpage = urllib2.urlopen(request).read()
1413		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1414			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1415			return
1416
1417		# Extract URL, uploader, and title from webpage
1418		self.report_extraction(video_id)
1419		mobj = re.search(r"download_url:'([^']+)'", webpage)
1420		if mobj is None:
1421			video_extension = 'flv'
1422			mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1423		if mobj is None:
1424			self._downloader.trouble(u'ERROR: unable to extract media URL')
1425			return
1426		mediaURL = urllib.unquote(mobj.group(1))
1427		mediaURL = mediaURL.replace('\\x3d', '\x3d')
1428		mediaURL = mediaURL.replace('\\x26', '\x26')
1429
1430		video_url = mediaURL
1431
1432		mobj = re.search(r'<title>(.*)</title>', webpage)
1433		if mobj is None:
1434			self._downloader.trouble(u'ERROR: unable to extract title')
1435			return
1436		video_title = mobj.group(1).decode('utf-8')
1437		video_title = sanitize_title(video_title)
1438		simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1439
1440		# Extract video description
1441		mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1442		if mobj is None:
1443			self._downloader.trouble(u'ERROR: unable to extract video description')
1444			return
1445		video_description = mobj.group(1).decode('utf-8')
1446		if not video_description:
1447			video_description = 'No description available.'
1448
1449		# Extract video thumbnail
1450		if self._downloader.params.get('forcethumbnail', False):
1451			request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1452			try:
1453				webpage = urllib2.urlopen(request).read()
1454			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1455				self._downloader.trouble(u'ERROR: Unable to r…

Large files files are truncated, but you can click here to view the full file