PageRenderTime 182ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/youtube_dl/extractor/youtube.py

https://gitlab.com/angelbirth/youtube-dl
Python | 1032 lines | 948 code | 48 blank | 36 comment | 35 complexity | 918139049aa213911173d9b9a12015e7 MD5 | raw file
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import itertools
  4. import json
  5. import os.path
  6. import random
  7. import re
  8. import time
  9. import traceback
  10. from .common import InfoExtractor, SearchInfoExtractor
  11. from ..jsinterp import JSInterpreter
  12. from ..swfinterp import SWFInterpreter
  13. from ..compat import (
  14. compat_chr,
  15. compat_parse_qs,
  16. compat_urllib_parse_unquote,
  17. compat_urllib_parse_unquote_plus,
  18. compat_urllib_parse_urlencode,
  19. compat_urllib_parse_urlparse,
  20. compat_urlparse,
  21. compat_str,
  22. )
  23. from ..utils import (
  24. clean_html,
  25. error_to_compat_str,
  26. ExtractorError,
  27. float_or_none,
  28. get_element_by_attribute,
  29. get_element_by_id,
  30. int_or_none,
  31. mimetype2ext,
  32. orderedSet,
  33. parse_duration,
  34. remove_quotes,
  35. remove_start,
  36. sanitized_Request,
  37. smuggle_url,
  38. str_to_int,
  39. unescapeHTML,
  40. unified_strdate,
  41. unsmuggle_url,
  42. uppercase_escape,
  43. urlencode_postdata,
  44. ISO3166Utils,
  45. )
  46. class YoutubeBaseInfoExtractor(InfoExtractor):
  47. """Provide base functions for Youtube extractors"""
  48. _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  49. _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  50. _PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/signin/challenge/sl/password'
  51. _NETRC_MACHINE = 'youtube'
  52. # If True it will raise an error if no login info is provided
  53. _LOGIN_REQUIRED = False
  54. def _set_language(self):
  55. self._set_cookie(
  56. '.youtube.com', 'PREF', 'f1=50000000&hl=en',
  57. # YouTube sets the expire time to about two months
  58. expire_time=time.time() + 2 * 30 * 24 * 3600)
  59. def _ids_to_results(self, ids):
  60. return [
  61. self.url_result(vid_id, 'Youtube', video_id=vid_id)
  62. for vid_id in ids]
  63. def _login(self):
  64. """
  65. Attempt to log in to YouTube.
  66. True is returned if successful or skipped.
  67. False is returned if login failed.
  68. If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  69. """
  70. (username, password) = self._get_login_info()
  71. # No authentication to be performed
  72. if username is None:
  73. if self._LOGIN_REQUIRED:
  74. raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
  75. return True
  76. login_page = self._download_webpage(
  77. self._LOGIN_URL, None,
  78. note='Downloading login page',
  79. errnote='unable to fetch login page', fatal=False)
  80. if login_page is False:
  81. return
  82. login_form = self._hidden_inputs(login_page)
  83. login_form.update({
  84. 'checkConnection': 'youtube',
  85. 'Email': username,
  86. 'Passwd': password,
  87. })
  88. login_results = self._download_webpage(
  89. self._PASSWORD_CHALLENGE_URL, None,
  90. note='Logging in', errnote='unable to log in', fatal=False,
  91. data=urlencode_postdata(login_form))
  92. if login_results is False:
  93. return False
  94. error_msg = self._html_search_regex(
  95. r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<',
  96. login_results, 'error message', default=None)
  97. if error_msg:
  98. raise ExtractorError('Unable to login: %s' % error_msg, expected=True)
  99. if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
  100. raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
  101. # Two-Factor
  102. # TODO add SMS and phone call support - these require making a request and then prompting the user
  103. if re.search(r'(?i)<form[^>]+id="challenge"', login_results) is not None:
  104. tfa_code = self._get_tfa_info('2-step verification code')
  105. if not tfa_code:
  106. self._downloader.report_warning(
  107. 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
  108. '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
  109. return False
  110. tfa_code = remove_start(tfa_code, 'G-')
  111. tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
  112. tfa_form_strs.update({
  113. 'Pin': tfa_code,
  114. 'TrustDevice': 'on',
  115. })
  116. tfa_data = urlencode_postdata(tfa_form_strs)
  117. tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)
  118. tfa_results = self._download_webpage(
  119. tfa_req, None,
  120. note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
  121. if tfa_results is False:
  122. return False
  123. if re.search(r'(?i)<form[^>]+id="challenge"', tfa_results) is not None:
  124. self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
  125. return False
  126. if re.search(r'(?i)<form[^>]+id="gaia_loginform"', tfa_results) is not None:
  127. self._downloader.report_warning('unable to log in - did the page structure change?')
  128. return False
  129. if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
  130. self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
  131. return False
  132. if re.search(r'(?i)<form[^>]+id="gaia_loginform"', login_results) is not None:
  133. self._downloader.report_warning('unable to log in: bad username or password')
  134. return False
  135. return True
  136. def _real_initialize(self):
  137. if self._downloader is None:
  138. return
  139. self._set_language()
  140. if not self._login():
  141. return
  142. class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
  143. # Extract entries from page with "Load more" button
  144. def _entries(self, page, playlist_id):
  145. more_widget_html = content_html = page
  146. for page_num in itertools.count(1):
  147. for entry in self._process_page(content_html):
  148. yield entry
  149. mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
  150. if not mobj:
  151. break
  152. more = self._download_json(
  153. 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
  154. 'Downloading page #%s' % page_num,
  155. transform_source=uppercase_escape)
  156. content_html = more['content_html']
  157. if not content_html.strip():
  158. # Some webpages show a "Load more" button but they don't
  159. # have more videos
  160. break
  161. more_widget_html = more['load_more_widget_html']
  162. class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
  163. def _process_page(self, content):
  164. for video_id, video_title in self.extract_videos_from_page(content):
  165. yield self.url_result(video_id, 'Youtube', video_id, video_title)
  166. def extract_videos_from_page(self, page):
  167. ids_in_page = []
  168. titles_in_page = []
  169. for mobj in re.finditer(self._VIDEO_RE, page):
  170. # The link with index 0 is not the first video of the playlist (not sure if still actual)
  171. if 'index' in mobj.groupdict() and mobj.group('id') == '0':
  172. continue
  173. video_id = mobj.group('id')
  174. video_title = unescapeHTML(mobj.group('title'))
  175. if video_title:
  176. video_title = video_title.strip()
  177. try:
  178. idx = ids_in_page.index(video_id)
  179. if video_title and not titles_in_page[idx]:
  180. titles_in_page[idx] = video_title
  181. except ValueError:
  182. ids_in_page.append(video_id)
  183. titles_in_page.append(video_title)
  184. return zip(ids_in_page, titles_in_page)
  185. class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
  186. def _process_page(self, content):
  187. for playlist_id in orderedSet(re.findall(
  188. r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
  189. content)):
  190. yield self.url_result(
  191. 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
  192. def _real_extract(self, url):
  193. playlist_id = self._match_id(url)
  194. webpage = self._download_webpage(url, playlist_id)
  195. title = self._og_search_title(webpage, fatal=False)
  196. return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
  197. class YoutubeIE(YoutubeBaseInfoExtractor):
  198. IE_DESC = 'YouTube.com'
  199. _VALID_URL = r"""(?x)^
  200. (
  201. (?:https?://|//) # http(s):// or protocol-independent URL
  202. (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
  203. (?:www\.)?deturl\.com/www\.youtube\.com/|
  204. (?:www\.)?pwnyoutube\.com/|
  205. (?:www\.)?yourepeat\.com/|
  206. tube\.majestyc\.net/|
  207. youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
  208. (?:.*?\#/)? # handle anchor (#/) redirect urls
  209. (?: # the various things that can precede the ID:
  210. (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
  211. |(?: # or the v= param in all its forms
  212. (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
  213. (?:\?|\#!?) # the params delimiter ? or # or #!
  214. (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
  215. v=
  216. )
  217. ))
  218. |(?:
  219. youtu\.be| # just youtu.be/xxxx
  220. vid\.plus| # or vid.plus/xxxx
  221. zwearz\.com/watch| # or zwearz.com/watch/xxxx
  222. )/
  223. |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
  224. )
  225. )? # all until now is optional -> you can pass the naked ID
  226. ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
  227. (?!.*?\blist=) # combined list/video URLs are handled by the playlist IE
  228. (?(1).+)? # if we found the ID, everything can follow
  229. $"""
  230. _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
  231. _formats = {
  232. '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
  233. '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
  234. '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
  235. '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
  236. '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
  237. '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
  238. '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
  239. '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
  240. # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
  241. '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
  242. '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
  243. '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
  244. '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
  245. '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
  246. '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
  247. '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
  248. '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
  249. '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
  250. # 3D videos
  251. '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
  252. '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
  253. '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
  254. '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
  255. '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
  256. '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
  257. '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
  258. # Apple HTTP Live Streaming
  259. '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
  260. '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
  261. '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
  262. '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
  263. '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
  264. '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
  265. '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
  266. '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
  267. # DASH mp4 video
  268. '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
  269. '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
  270. '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
  271. '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
  272. '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
  273. '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40}, # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)
  274. '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
  275. '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
  276. '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
  277. '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60, 'preference': -40},
  278. '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264', 'preference': -40},
  279. # Dash mp4 audio
  280. '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
  281. '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
  282. '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
  283. '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
  284. '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
  285. # Dash webm
  286. '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
  287. '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
  288. '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
  289. '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
  290. '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
  291. '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
  292. '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9', 'preference': -40},
  293. '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
  294. '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
  295. '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
  296. '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
  297. '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
  298. '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
  299. '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
  300. '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
  301. # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
  302. '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
  303. '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
  304. '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
  305. '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
  306. '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'preference': -40},
  307. '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60, 'preference': -40},
  308. # Dash webm audio
  309. '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
  310. '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256, 'preference': -50},
  311. # Dash webm audio with opus inside
  312. '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50, 'preference': -50},
  313. '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70, 'preference': -50},
  314. '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160, 'preference': -50},
  315. # RTMP (unnamed)
  316. '_rtmp': {'protocol': 'rtmp'},
  317. }
  318. _SUBTITLE_FORMATS = ('ttml', 'vtt')
  319. IE_NAME = 'youtube'
  320. _TESTS = [
  321. {
  322. 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
  323. 'info_dict': {
  324. 'id': 'BaW_jenozKc',
  325. 'ext': 'mp4',
  326. 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
  327. 'uploader': 'Philipp Hagemeister',
  328. 'uploader_id': 'phihag',
  329. 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
  330. 'upload_date': '20121002',
  331. 'license': 'Standard YouTube License',
  332. 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
  333. 'categories': ['Science & Technology'],
  334. 'tags': ['youtube-dl'],
  335. 'like_count': int,
  336. 'dislike_count': int,
  337. 'start_time': 1,
  338. 'end_time': 9,
  339. }
  340. },
  341. {
  342. 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
  343. 'note': 'Test generic use_cipher_signature video (#897)',
  344. 'info_dict': {
  345. 'id': 'UxxajLWwzqY',
  346. 'ext': 'mp4',
  347. 'upload_date': '20120506',
  348. 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
  349. 'alt_title': 'I Love It (feat. Charli XCX)',
  350. 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
  351. 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
  352. 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
  353. 'iconic ep', 'iconic', 'love', 'it'],
  354. 'uploader': 'Icona Pop',
  355. 'uploader_id': 'IconaPop',
  356. 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IconaPop',
  357. 'license': 'Standard YouTube License',
  358. 'creator': 'Icona Pop',
  359. }
  360. },
  361. {
  362. 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
  363. 'note': 'Test VEVO video with age protection (#956)',
  364. 'info_dict': {
  365. 'id': '07FYdnEawAQ',
  366. 'ext': 'mp4',
  367. 'upload_date': '20130703',
  368. 'title': 'Justin Timberlake - Tunnel Vision (Explicit)',
  369. 'alt_title': 'Tunnel Vision',
  370. 'description': 'md5:64249768eec3bc4276236606ea996373',
  371. 'uploader': 'justintimberlakeVEVO',
  372. 'uploader_id': 'justintimberlakeVEVO',
  373. 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
  374. 'license': 'Standard YouTube License',
  375. 'creator': 'Justin Timberlake',
  376. 'age_limit': 18,
  377. }
  378. },
  379. {
  380. 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
  381. 'note': 'Embed-only video (#1746)',
  382. 'info_dict': {
  383. 'id': 'yZIXLfi8CZQ',
  384. 'ext': 'mp4',
  385. 'upload_date': '20120608',
  386. 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
  387. 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
  388. 'uploader': 'SET India',
  389. 'uploader_id': 'setindia',
  390. 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/setindia',
  391. 'license': 'Standard YouTube License',
  392. 'age_limit': 18,
  393. }
  394. },
  395. {
  396. 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
  397. 'note': 'Use the first video ID in the URL',
  398. 'info_dict': {
  399. 'id': 'BaW_jenozKc',
  400. 'ext': 'mp4',
  401. 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
  402. 'uploader': 'Philipp Hagemeister',
  403. 'uploader_id': 'phihag',
  404. 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag',
  405. 'upload_date': '20121002',
  406. 'license': 'Standard YouTube License',
  407. 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
  408. 'categories': ['Science & Technology'],
  409. 'tags': ['youtube-dl'],
  410. 'like_count': int,
  411. 'dislike_count': int,
  412. },
  413. 'params': {
  414. 'skip_download': True,
  415. },
  416. },
  417. {
  418. 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
  419. 'note': '256k DASH audio (format 141) via DASH manifest',
  420. 'info_dict': {
  421. 'id': 'a9LDPn-MO4I',
  422. 'ext': 'm4a',
  423. 'upload_date': '20121002',
  424. 'uploader_id': '8KVIDEO',
  425. 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
  426. 'description': '',
  427. 'uploader': '8KVIDEO',
  428. 'license': 'Standard YouTube License',
  429. 'title': 'UHDTV TEST 8K VIDEO.mp4'
  430. },
  431. 'params': {
  432. 'youtube_include_dash_manifest': True,
  433. 'format': '141',
  434. },
  435. 'skip': 'format 141 not served anymore',
  436. },
  437. # DASH manifest with encrypted signature
  438. {
  439. 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
  440. 'info_dict': {
  441. 'id': 'IB3lcPjvWLA',
  442. 'ext': 'm4a',
  443. 'title': 'Afrojack, Spree Wilson - The Spark ft. Spree Wilson',
  444. 'description': 'md5:12e7067fa6735a77bdcbb58cb1187d2d',
  445. 'uploader': 'AfrojackVEVO',
  446. 'uploader_id': 'AfrojackVEVO',
  447. 'upload_date': '20131011',
  448. 'license': 'Standard YouTube License',
  449. },
  450. 'params': {
  451. 'youtube_include_dash_manifest': True,
  452. 'format': '141/bestaudio[ext=m4a]',
  453. },
  454. },
  455. # JS player signature function name containing $
  456. {
  457. 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
  458. 'info_dict': {
  459. 'id': 'nfWlot6h_JM',
  460. 'ext': 'm4a',
  461. 'title': 'Taylor Swift - Shake It Off',
  462. 'alt_title': 'Shake It Off',
  463. 'description': 'md5:95f66187cd7c8b2c13eb78e1223b63c3',
  464. 'uploader': 'TaylorSwiftVEVO',
  465. 'uploader_id': 'TaylorSwiftVEVO',
  466. 'upload_date': '20140818',
  467. 'license': 'Standard YouTube License',
  468. 'creator': 'Taylor Swift',
  469. },
  470. 'params': {
  471. 'youtube_include_dash_manifest': True,
  472. 'format': '141/bestaudio[ext=m4a]',
  473. },
  474. },
  475. # Controversy video
  476. {
  477. 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
  478. 'info_dict': {
  479. 'id': 'T4XJQO3qol8',
  480. 'ext': 'mp4',
  481. 'upload_date': '20100909',
  482. 'uploader': 'The Amazing Atheist',
  483. 'uploader_id': 'TheAmazingAtheist',
  484. 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
  485. 'license': 'Standard YouTube License',
  486. 'title': 'Burning Everyone\'s Koran',
  487. 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
  488. }
  489. },
  490. # Normal age-gate video (No vevo, embed allowed)
  491. {
  492. 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
  493. 'info_dict': {
  494. 'id': 'HtVdAasjOgU',
  495. 'ext': 'mp4',
  496. 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
  497. 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
  498. 'uploader': 'The Witcher',
  499. 'uploader_id': 'WitcherGame',
  500. 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
  501. 'upload_date': '20140605',
  502. 'license': 'Standard YouTube License',
  503. 'age_limit': 18,
  504. },
  505. },
  506. # Age-gate video with encrypted signature
  507. {
  508. 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
  509. 'info_dict': {
  510. 'id': '6kLq3WMV1nU',
  511. 'ext': 'mp4',
  512. 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
  513. 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
  514. 'uploader': 'LloydVEVO',
  515. 'uploader_id': 'LloydVEVO',
  516. 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
  517. 'upload_date': '20110629',
  518. 'license': 'Standard YouTube License',
  519. 'age_limit': 18,
  520. },
  521. },
  522. # video_info is None (https://github.com/rg3/youtube-dl/issues/4421)
  523. {
  524. 'url': '__2ABJjxzNo',
  525. 'info_dict': {
  526. 'id': '__2ABJjxzNo',
  527. 'ext': 'mp4',
  528. 'upload_date': '20100430',
  529. 'uploader_id': 'deadmau5',
  530. 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/deadmau5',
  531. 'creator': 'deadmau5',
  532. 'description': 'md5:12c56784b8032162bb936a5f76d55360',
  533. 'uploader': 'deadmau5',
  534. 'license': 'Standard YouTube License',
  535. 'title': 'Deadmau5 - Some Chords (HD)',
  536. 'alt_title': 'Some Chords',
  537. },
  538. 'expected_warnings': [
  539. 'DASH manifest missing',
  540. ]
  541. },
  542. # Olympics (https://github.com/rg3/youtube-dl/issues/4431)
  543. {
  544. 'url': 'lqQg6PlCWgI',
  545. 'info_dict': {
  546. 'id': 'lqQg6PlCWgI',
  547. 'ext': 'mp4',
  548. 'upload_date': '20150827',
  549. 'uploader_id': 'olympic',
  550. 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic',
  551. 'license': 'Standard YouTube License',
  552. 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
  553. 'uploader': 'Olympic',
  554. 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
  555. },
  556. 'params': {
  557. 'skip_download': 'requires avconv',
  558. }
  559. },
  560. # Non-square pixels
  561. {
  562. 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
  563. 'info_dict': {
  564. 'id': '_b-2C3KPAM0',
  565. 'ext': 'mp4',
  566. 'stretched_ratio': 16 / 9.,
  567. 'upload_date': '20110310',
  568. 'uploader_id': 'AllenMeow',
  569. 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
  570. 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
  571. 'uploader': '孫艾倫',
  572. 'license': 'Standard YouTube License',
  573. 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
  574. },
  575. },
  576. # url_encoded_fmt_stream_map is empty string
  577. {
  578. 'url': 'qEJwOuvDf7I',
  579. 'info_dict': {
  580. 'id': 'qEJwOuvDf7I',
  581. 'ext': 'webm',
  582. 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
  583. 'description': '',
  584. 'upload_date': '20150404',
  585. 'uploader_id': 'spbelect',
  586. 'uploader': 'Наблюдатели Петербурга',
  587. },
  588. 'params': {
  589. 'skip_download': 'requires avconv',
  590. },
  591. 'skip': 'This live event has ended.',
  592. },
  593. # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
  594. {
  595. 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
  596. 'info_dict': {
  597. 'id': 'FIl7x6_3R5Y',
  598. 'ext': 'mp4',
  599. 'title': 'md5:7b81415841e02ecd4313668cde88737a',
  600. 'description': 'md5:116377fd2963b81ec4ce64b542173306',
  601. 'upload_date': '20150625',
  602. 'uploader_id': 'dorappi2000',
  603. 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
  604. 'uploader': 'dorappi2000',
  605. 'license': 'Standard YouTube License',
  606. 'formats': 'mincount:32',
  607. },
  608. },
  609. # DASH manifest with segment_list
  610. {
  611. 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
  612. 'md5': '8ce563a1d667b599d21064e982ab9e31',
  613. 'info_dict': {
  614. 'id': 'CsmdDsKjzN8',
  615. 'ext': 'mp4',
  616. 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
  617. 'uploader': 'Airtek',
  618. 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
  619. 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
  620. 'license': 'Standard YouTube License',
  621. 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
  622. },
  623. 'params': {
  624. 'youtube_include_dash_manifest': True,
  625. 'format': '135', # bestvideo
  626. },
  627. 'skip': 'This live event has ended.',
  628. },
  629. {
  630. # Multifeed videos (multiple cameras), URL is for Main Camera
  631. 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
  632. 'info_dict': {
  633. 'id': 'jqWvoWXjCVs',
  634. 'title': 'teamPGP: Rocket League Noob Stream',
  635. 'description': 'md5:dc7872fb300e143831327f1bae3af010',
  636. },
  637. 'playlist': [{
  638. 'info_dict': {
  639. 'id': 'jqWvoWXjCVs',
  640. 'ext': 'mp4',
  641. 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
  642. 'description': 'md5:dc7872fb300e143831327f1bae3af010',
  643. 'upload_date': '20150721',
  644. 'uploader': 'Beer Games Beer',
  645. 'uploader_id': 'beergamesbeer',
  646. 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
  647. 'license': 'Standard YouTube License',
  648. },
  649. }, {
  650. 'info_dict': {
  651. 'id': '6h8e8xoXJzg',
  652. 'ext': 'mp4',
  653. 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
  654. 'description': 'md5:dc7872fb300e143831327f1bae3af010',
  655. 'upload_date': '20150721',
  656. 'uploader': 'Beer Games Beer',
  657. 'uploader_id': 'beergamesbeer',
  658. 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
  659. 'license': 'Standard YouTube License',
  660. },
  661. }, {
  662. 'info_dict': {
  663. 'id': 'PUOgX5z9xZw',
  664. 'ext': 'mp4',
  665. 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
  666. 'description': 'md5:dc7872fb300e143831327f1bae3af010',
  667. 'upload_date': '20150721',
  668. 'uploader': 'Beer Games Beer',
  669. 'uploader_id': 'beergamesbeer',
  670. 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
  671. 'license': 'Standard YouTube License',
  672. },
  673. }, {
  674. 'info_dict': {
  675. 'id': 'teuwxikvS5k',
  676. 'ext': 'mp4',
  677. 'title': 'teamPGP: Rocket League Noob Stream (zim)',
  678. 'description': 'md5:dc7872fb300e143831327f1bae3af010',
  679. 'upload_date': '20150721',
  680. 'uploader': 'Beer Games Beer',
  681. 'uploader_id': 'beergamesbeer',
  682. 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
  683. 'license': 'Standard YouTube License',
  684. },
  685. }],
  686. 'params': {
  687. 'skip_download': True,
  688. },
  689. },
  690. {
  691. # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536)
  692. 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
  693. 'info_dict': {
  694. 'id': 'gVfLd0zydlo',
  695. 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
  696. },
  697. 'playlist_count': 2,
  698. 'skip': 'Not multifeed anymore',
  699. },
  700. {
  701. 'url': 'https://vid.plus/FlRa-iH7PGw',
  702. 'only_matching': True,
  703. },
  704. {
  705. 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
  706. 'only_matching': True,
  707. },
  708. {
  709. # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
  710. # Also tests cut-off URL expansion in video description (see
  711. # https://github.com/rg3/youtube-dl/issues/1892,
  712. # https://github.com/rg3/youtube-dl/issues/8164)
  713. 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
  714. 'info_dict': {
  715. 'id': 'lsguqyKfVQg',
  716. 'ext': 'mp4',
  717. 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
  718. 'alt_title': 'Dark Walk',
  719. 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
  720. 'upload_date': '20151119',
  721. 'uploader_id': 'IronSoulElf',
  722. 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
  723. 'uploader': 'IronSoulElf',
  724. 'license': 'Standard YouTube License',
  725. 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan',
  726. },
  727. 'params': {
  728. 'skip_download': True,
  729. },
  730. },
  731. {
  732. # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468)
  733. 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
  734. 'only_matching': True,
  735. },
  736. {
  737. # Video with yt:stretch=17:0
  738. 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
  739. 'info_dict': {
  740. 'id': 'Q39EVAstoRM',
  741. 'ext': 'mp4',
  742. 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
  743. 'description': 'md5:ee18a25c350637c8faff806845bddee9',
  744. 'upload_date': '20151107',
  745. 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
  746. 'uploader': 'CH GAMER DROID',
  747. },
  748. 'params': {
  749. 'skip_download': True,
  750. },
  751. 'skip': 'This video does not exist.',
  752. },
  753. {
  754. # Video licensed under Creative Commons
  755. 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
  756. 'info_dict': {
  757. 'id': 'M4gD1WSo5mA',
  758. 'ext': 'mp4',
  759. 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
  760. 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
  761. 'upload_date': '20150127',
  762. 'uploader_id': 'BerkmanCenter',
  763. 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
  764. 'uploader': 'BerkmanCenter',
  765. 'license': 'Creative Commons Attribution license (reuse allowed)',
  766. },
  767. 'params': {
  768. 'skip_download': True,
  769. },
  770. },
  771. {
  772. # Channel-like uploader_url
  773. 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
  774. 'info_dict': {
  775. 'id': 'eQcmzGIKrzg',
  776. 'ext': 'mp4',
  777. 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
  778. 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
  779. 'upload_date': '20151119',
  780. 'uploader': 'Bernie 2016',
  781. 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
  782. 'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
  783. 'license': 'Creative Commons Attribution license (reuse allowed)',
  784. },
  785. 'params': {
  786. 'skip_download': True,
  787. },
  788. },
  789. {
  790. 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
  791. 'only_matching': True,
  792. },
  793. {
  794. # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059)
  795. 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
  796. 'only_matching': True,
  797. },
  798. {
  799. # Rental video preview
  800. 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
  801. 'info_dict': {
  802. 'id': 'uGpuVWrhIzE',
  803. 'ext': 'mp4',
  804. 'title': 'Piku - Trailer',
  805. 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
  806. 'upload_date': '20150811',
  807. 'uploader': 'FlixMatrix',
  808. 'uploader_id': 'FlixMatrixKaravan',
  809. 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
  810. 'license': 'Standard YouTube License',
  811. },
  812. 'params': {
  813. 'skip_download': True,
  814. },
  815. }
  816. ]
  817. def __init__(self, *args, **kwargs):
  818. super(YoutubeIE, self).__init__(*args, **kwargs)
  819. self._player_cache = {}
  820. def report_video_info_webpage_download(self, video_id):
  821. """Report attempt to download video info webpage."""
  822. self.to_screen('%s: Downloading video info webpage' % video_id)
  823. def report_information_extraction(self, video_id):
  824. """Report attempt to extract video information."""
  825. self.to_screen('%s: Extracting video information' % video_id)
  826. def report_unavailable_format(self, video_id, format):
  827. """Report extracted video URL."""
  828. self.to_screen('%s: Format %s not available' % (video_id, format))
  829. def report_rtmp_download(self):
  830. """Indicate the download will use the RTMP protocol."""
  831. self.to_screen('RTMP download detected')
  832. def _signature_cache_id(self, example_sig):
  833. """ Return a string representation of a signature """
  834. return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
  835. def _extract_signature_function(self, video_id, player_url, example_sig):
  836. id_m = re.match(
  837. r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$',
  838. player_url)
  839. if not id_m:
  840. raise ExtractorError('Cannot identify player %r' % player_url)
  841. player_type = id_m.group('ext')
  842. player_id = id_m.group('id')
  843. # Read from filesystem cache
  844. func_id = '%s_%s_%s' % (
  845. player_type, player_id, self._signature_cache_id(example_sig))
  846. assert os.path.basename(func_id) == func_id
  847. cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
  848. if cache_spec is not None:
  849. return lambda s: ''.join(s[i] for i in cache_spec)
  850. download_note = (
  851. 'Downloading player %s' % player_url
  852. if self._downloader.params.get('verbose') else
  853. 'Downloading %s player %s' % (player_type, player_id)
  854. )
  855. if player_type == 'js':
  856. code = self._download_webpage(
  857. player_url, video_id,
  858. note=download_note,
  859. errnote='Download of %s failed' % player_url)
  860. res = self._parse_sig_js(code)
  861. elif player_type == 'swf':
  862. urlh = self._request_webpage(
  863. player_url, video_id,
  864. note=download_note,
  865. errnote='Download of %s failed' % player_url)
  866. code = urlh.read()
  867. res = self._parse_sig_swf(code)
  868. else:
  869. assert False, 'Invalid player type %r' % player_type
  870. test_string = ''.join(map(compat_chr, range(len(example_sig))))
  871. cache_res = res(test_string)
  872. cache_spec = [ord(c) for c in cache_res]
  873. self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
  874. return res
  875. def _print_sig_code(self, func, example_sig):
  876. def gen_sig_code(idxs):
  877. def _genslice(start, end, step):
  878. starts = '' if start == 0 else str(start)
  879. ends = (':%d' % (end + step)) if end + step >= 0 else ':'
  880. steps = '' if step == 1 else (':%d' % step)
  881. return 's[%s%s%s]' % (starts, ends, steps)
  882. step = None
  883. # Quelch pyflakes warnings - start will be set when step is set
  884. start = '(Never used)'
  885. for i, prev in zip(idxs[1:], idxs[:-1]):
  886. if step is not None:
  887. if i - prev == step:
  888. continue
  889. yield _genslice(start, prev, step)
  890. step = None
  891. continue
  892. if i - prev in [-1, 1]:
  893. step = i - prev
  894. start = prev
  895. continue
  896. else:
  897. yield 's[%d]' % prev
  898. if step is None:
  899. yield 's[%d]' % i
  900. else:
  901. yield _genslice(start, i, step)
  902. test_string = ''.join(map(compat_chr, range(len(example_sig))))
  903. cache_res = func(test_string)
  904. cache_spec = [ord(c) for c in cache_res]
  905. expr_code = ' + '.join(gen_sig_code(cache_spec))
  906. signature_id_tuple = '(%s)' % (
  907. ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
  908. code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
  909. ' return %s\n') % (signature_id_tuple, expr_code)
  910. self.to_screen('Extracted signature function:\n' + code)
  911. def _parse_sig_js(self, jscode):
  912. funcname = self._search_regex(
  913. r'\.sig\|\|([a-zA-Z0-9$]+)\(', jscode,
  914. 'Initial JS player signature function name')
  915. jsi = JSInterpreter(jscode)
  916. initial_function = jsi.extract_function(funcname)
  917. return lambda s: initial_function([s])
  918. def _parse_sig_swf(self, file_contents):
  919. swfi = SWFInterpreter(file_contents)
  920. TARGET_CLASSNAME = 'SignatureDecipher'
  921. searched_class = swfi.extract_class(TARGET_CLASSNAME)
  922. initial_function = swfi.extract_function(searched_class, 'decipher')
  923. return lambda s: initial_function([s])
  924. def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
  925. """Turn the encrypted s field into a working signature"""
  926. if player_url is None:
  927. raise ExtractorError('Cannot decrypt signature without player_url')
  928. if player_url.startswith('//'):
  929. player_url = 'https:' + player_url
  930. try:
  931. player_id = (player_url, self._signature_cache_id(s))
  932. if player_id not in self._player_cache:
  933. func = self._extract_signature_function(
  934. video_id, player_url, s
  935. )
  936. self._player_cache[player_id] = func
  937. func = self._player_cache[player_id]
  938. if self._downloader.params.get('youtube_print_sig_code'):
  939. self._print_sig_code(func, s)
  940. return func(s)
  941. except Exception as e:
  942. tb = traceback.format_exc()
  943. raise ExtractorError(
  944. 'Signature extraction failed: ' + tb, cause=e)
  945. def _get_subtitles(self, video_id, webpage):
  946. try:
  947. subs_doc = self._download_xml(
  948. 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
  949. video_id, note=False)
  950. except ExtractorError as err:
  951. self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
  952. return {}
  953. sub_lang_list = {}
  954. for track in subs_doc.findall('track'):
  955. lang = track.attrib['lang_code']
  956. if lang in sub_lang_list:
  957. continue
  958. sub_formats = []