PageRenderTime 1652ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/yt_dlp/extractor/dailymotion.py

https://gitlab.com/vitalii.dr/yt-dlp
Python | 389 lines | 378 code | 10 blank | 1 comment | 17 complexity | 16b1aae33b7731466ac5f292e0b512d0 MD5 | raw file
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import functools
  4. import json
  5. import re
  6. from .common import InfoExtractor
  7. from ..compat import compat_HTTPError
  8. from ..utils import (
  9. age_restricted,
  10. clean_html,
  11. ExtractorError,
  12. int_or_none,
  13. OnDemandPagedList,
  14. try_get,
  15. unescapeHTML,
  16. urlencode_postdata,
  17. )
  18. class DailymotionBaseInfoExtractor(InfoExtractor):
  19. _FAMILY_FILTER = None
  20. _HEADERS = {
  21. 'Content-Type': 'application/json',
  22. 'Origin': 'https://www.dailymotion.com',
  23. }
  24. _NETRC_MACHINE = 'dailymotion'
  25. def _get_dailymotion_cookies(self):
  26. return self._get_cookies('https://www.dailymotion.com/')
  27. @staticmethod
  28. def _get_cookie_value(cookies, name):
  29. cookie = cookies.get(name)
  30. if cookie:
  31. return cookie.value
  32. def _set_dailymotion_cookie(self, name, value):
  33. self._set_cookie('www.dailymotion.com', name, value)
  34. def _real_initialize(self):
  35. cookies = self._get_dailymotion_cookies()
  36. ff = self._get_cookie_value(cookies, 'ff')
  37. self._FAMILY_FILTER = ff == 'on' if ff else age_restricted(18, self.get_param('age_limit'))
  38. self._set_dailymotion_cookie('ff', 'on' if self._FAMILY_FILTER else 'off')
  39. def _call_api(self, object_type, xid, object_fields, note, filter_extra=None):
  40. if not self._HEADERS.get('Authorization'):
  41. cookies = self._get_dailymotion_cookies()
  42. token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token')
  43. if not token:
  44. data = {
  45. 'client_id': 'f1a362d288c1b98099c7',
  46. 'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
  47. }
  48. username, password = self._get_login_info()
  49. if username:
  50. data.update({
  51. 'grant_type': 'password',
  52. 'password': password,
  53. 'username': username,
  54. })
  55. else:
  56. data['grant_type'] = 'client_credentials'
  57. try:
  58. token = self._download_json(
  59. 'https://graphql.api.dailymotion.com/oauth/token',
  60. None, 'Downloading Access Token',
  61. data=urlencode_postdata(data))['access_token']
  62. except ExtractorError as e:
  63. if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
  64. raise ExtractorError(self._parse_json(
  65. e.cause.read().decode(), xid)['error_description'], expected=True)
  66. raise
  67. self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
  68. self._HEADERS['Authorization'] = 'Bearer ' + token
  69. resp = self._download_json(
  70. 'https://graphql.api.dailymotion.com/', xid, note, data=json.dumps({
  71. 'query': '''{
  72. %s(xid: "%s"%s) {
  73. %s
  74. }
  75. }''' % (object_type, xid, ', ' + filter_extra if filter_extra else '', object_fields),
  76. }).encode(), headers=self._HEADERS)
  77. obj = resp['data'][object_type]
  78. if not obj:
  79. raise ExtractorError(resp['errors'][0]['message'], expected=True)
  80. return obj
  81. class DailymotionIE(DailymotionBaseInfoExtractor):
  82. _VALID_URL = r'''(?ix)
  83. https?://
  84. (?:
  85. (?:(?:www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|\#)/)?video|swf)|
  86. (?:www\.)?lequipe\.fr/video
  87. )
  88. /(?P<id>[^/?_]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
  89. '''
  90. IE_NAME = 'dailymotion'
  91. _TESTS = [{
  92. 'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
  93. 'md5': '074b95bdee76b9e3654137aee9c79dfe',
  94. 'info_dict': {
  95. 'id': 'x5kesuj',
  96. 'ext': 'mp4',
  97. 'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller',
  98. 'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller',
  99. 'duration': 187,
  100. 'timestamp': 1493651285,
  101. 'upload_date': '20170501',
  102. 'uploader': 'Deadline',
  103. 'uploader_id': 'x1xm8ri',
  104. 'age_limit': 0,
  105. },
  106. }, {
  107. 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
  108. 'md5': '2137c41a8e78554bb09225b8eb322406',
  109. 'info_dict': {
  110. 'id': 'x2iuewm',
  111. 'ext': 'mp4',
  112. 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
  113. 'description': 'Several come bundled with the Steam Controller.',
  114. 'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
  115. 'duration': 74,
  116. 'timestamp': 1425657362,
  117. 'upload_date': '20150306',
  118. 'uploader': 'IGN',
  119. 'uploader_id': 'xijv66',
  120. 'age_limit': 0,
  121. 'view_count': int,
  122. },
  123. 'skip': 'video gone',
  124. }, {
  125. # Vevo video
  126. 'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
  127. 'info_dict': {
  128. 'title': 'Roar (Official)',
  129. 'id': 'USUV71301934',
  130. 'ext': 'mp4',
  131. 'uploader': 'Katy Perry',
  132. 'upload_date': '20130905',
  133. },
  134. 'params': {
  135. 'skip_download': True,
  136. },
  137. 'skip': 'VEVO is only available in some countries',
  138. }, {
  139. # age-restricted video
  140. 'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
  141. 'md5': '0d667a7b9cebecc3c89ee93099c4159d',
  142. 'info_dict': {
  143. 'id': 'xyh2zz',
  144. 'ext': 'mp4',
  145. 'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
  146. 'uploader': 'HotWaves1012',
  147. 'age_limit': 18,
  148. },
  149. 'skip': 'video gone',
  150. }, {
  151. # geo-restricted, player v5
  152. 'url': 'http://www.dailymotion.com/video/xhza0o',
  153. 'only_matching': True,
  154. }, {
  155. # with subtitles
  156. 'url': 'http://www.dailymotion.com/video/x20su5f_the-power-of-nightmares-1-the-rise-of-the-politics-of-fear-bbc-2004_news',
  157. 'only_matching': True,
  158. }, {
  159. 'url': 'http://www.dailymotion.com/swf/video/x3n92nf',
  160. 'only_matching': True,
  161. }, {
  162. 'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun',
  163. 'only_matching': True,
  164. }, {
  165. 'url': 'https://www.lequipe.fr/video/x791mem',
  166. 'only_matching': True,
  167. }, {
  168. 'url': 'https://www.lequipe.fr/video/k7MtHciueyTcrFtFKA2',
  169. 'only_matching': True,
  170. }, {
  171. 'url': 'https://www.dailymotion.com/video/x3z49k?playlist=xv4bw',
  172. 'only_matching': True,
  173. }]
  174. _GEO_BYPASS = False
  175. _COMMON_MEDIA_FIELDS = '''description
  176. geoblockedCountries {
  177. allowed
  178. }
  179. xid'''
  180. @staticmethod
  181. def _extract_urls(webpage):
  182. urls = []
  183. # Look for embedded Dailymotion player
  184. # https://developer.dailymotion.com/player#player-parameters
  185. for mobj in re.finditer(
  186. r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage):
  187. urls.append(unescapeHTML(mobj.group('url')))
  188. for mobj in re.finditer(
  189. r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
  190. urls.append('https://www.dailymotion.com/embed/video/' + mobj.group('id'))
  191. return urls
  192. def _real_extract(self, url):
  193. video_id, playlist_id = self._match_valid_url(url).groups()
  194. if playlist_id:
  195. if self._yes_playlist(playlist_id, video_id):
  196. return self.url_result(
  197. 'http://www.dailymotion.com/playlist/' + playlist_id,
  198. 'DailymotionPlaylist', playlist_id)
  199. password = self.get_param('videopassword')
  200. media = self._call_api(
  201. 'media', video_id, '''... on Video {
  202. %s
  203. stats {
  204. likes {
  205. total
  206. }
  207. views {
  208. total
  209. }
  210. }
  211. }
  212. ... on Live {
  213. %s
  214. audienceCount
  215. isOnAir
  216. }''' % (self._COMMON_MEDIA_FIELDS, self._COMMON_MEDIA_FIELDS), 'Downloading media JSON metadata',
  217. 'password: "%s"' % self.get_param('videopassword') if password else None)
  218. xid = media['xid']
  219. metadata = self._download_json(
  220. 'https://www.dailymotion.com/player/metadata/video/' + xid,
  221. xid, 'Downloading metadata JSON',
  222. query={'app': 'com.dailymotion.neon'})
  223. error = metadata.get('error')
  224. if error:
  225. title = error.get('title') or error['raw_message']
  226. # See https://developer.dailymotion.com/api#access-error
  227. if error.get('code') == 'DM007':
  228. allowed_countries = try_get(media, lambda x: x['geoblockedCountries']['allowed'], list)
  229. self.raise_geo_restricted(msg=title, countries=allowed_countries)
  230. raise ExtractorError(
  231. '%s said: %s' % (self.IE_NAME, title), expected=True)
  232. title = metadata['title']
  233. is_live = media.get('isOnAir')
  234. formats = []
  235. for quality, media_list in metadata['qualities'].items():
  236. for m in media_list:
  237. media_url = m.get('url')
  238. media_type = m.get('type')
  239. if not media_url or media_type == 'application/vnd.lumberjack.manifest':
  240. continue
  241. if media_type == 'application/x-mpegURL':
  242. formats.extend(self._extract_m3u8_formats(
  243. media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False))
  244. else:
  245. f = {
  246. 'url': media_url,
  247. 'format_id': 'http-' + quality,
  248. }
  249. m = re.search(r'/H264-(\d+)x(\d+)(?:-(60)/)?', media_url)
  250. if m:
  251. width, height, fps = map(int_or_none, m.groups())
  252. f.update({
  253. 'fps': fps,
  254. 'height': height,
  255. 'width': width,
  256. })
  257. formats.append(f)
  258. for f in formats:
  259. f['url'] = f['url'].split('#')[0]
  260. if not f.get('fps') and f['format_id'].endswith('@60'):
  261. f['fps'] = 60
  262. self._sort_formats(formats)
  263. subtitles = {}
  264. subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
  265. for subtitle_lang, subtitle in subtitles_data.items():
  266. subtitles[subtitle_lang] = [{
  267. 'url': subtitle_url,
  268. } for subtitle_url in subtitle.get('urls', [])]
  269. thumbnails = []
  270. for height, poster_url in metadata.get('posters', {}).items():
  271. thumbnails.append({
  272. 'height': int_or_none(height),
  273. 'id': height,
  274. 'url': poster_url,
  275. })
  276. owner = metadata.get('owner') or {}
  277. stats = media.get('stats') or {}
  278. get_count = lambda x: int_or_none(try_get(stats, lambda y: y[x + 's']['total']))
  279. return {
  280. 'id': video_id,
  281. 'title': title,
  282. 'description': clean_html(media.get('description')),
  283. 'thumbnails': thumbnails,
  284. 'duration': int_or_none(metadata.get('duration')) or None,
  285. 'timestamp': int_or_none(metadata.get('created_time')),
  286. 'uploader': owner.get('screenname'),
  287. 'uploader_id': owner.get('id') or metadata.get('screenname'),
  288. 'age_limit': 18 if metadata.get('explicit') else 0,
  289. 'tags': metadata.get('tags'),
  290. 'view_count': get_count('view') or int_or_none(media.get('audienceCount')),
  291. 'like_count': get_count('like'),
  292. 'formats': formats,
  293. 'subtitles': subtitles,
  294. 'is_live': is_live,
  295. }
  296. class DailymotionPlaylistBaseIE(DailymotionBaseInfoExtractor):
  297. _PAGE_SIZE = 100
  298. def _fetch_page(self, playlist_id, page):
  299. page += 1
  300. videos = self._call_api(
  301. self._OBJECT_TYPE, playlist_id,
  302. '''videos(allowExplicit: %s, first: %d, page: %d) {
  303. edges {
  304. node {
  305. xid
  306. url
  307. }
  308. }
  309. }''' % ('false' if self._FAMILY_FILTER else 'true', self._PAGE_SIZE, page),
  310. 'Downloading page %d' % page)['videos']
  311. for edge in videos['edges']:
  312. node = edge['node']
  313. yield self.url_result(
  314. node['url'], DailymotionIE.ie_key(), node['xid'])
  315. def _real_extract(self, url):
  316. playlist_id = self._match_id(url)
  317. entries = OnDemandPagedList(functools.partial(
  318. self._fetch_page, playlist_id), self._PAGE_SIZE)
  319. return self.playlist_result(
  320. entries, playlist_id)
  321. class DailymotionPlaylistIE(DailymotionPlaylistBaseIE):
  322. IE_NAME = 'dailymotion:playlist'
  323. _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
  324. _TESTS = [{
  325. 'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
  326. 'info_dict': {
  327. 'id': 'xv4bw',
  328. },
  329. 'playlist_mincount': 20,
  330. }]
  331. _OBJECT_TYPE = 'collection'
  332. class DailymotionUserIE(DailymotionPlaylistBaseIE):
  333. IE_NAME = 'dailymotion:user'
  334. _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<id>[^/]+)'
  335. _TESTS = [{
  336. 'url': 'https://www.dailymotion.com/user/nqtv',
  337. 'info_dict': {
  338. 'id': 'nqtv',
  339. },
  340. 'playlist_mincount': 152,
  341. }, {
  342. 'url': 'http://www.dailymotion.com/user/UnderProject',
  343. 'info_dict': {
  344. 'id': 'UnderProject',
  345. },
  346. 'playlist_mincount': 1000,
  347. 'skip': 'Takes too long time',
  348. }, {
  349. 'url': 'https://www.dailymotion.com/user/nqtv',
  350. 'info_dict': {
  351. 'id': 'nqtv',
  352. },
  353. 'playlist_mincount': 148,
  354. 'params': {
  355. 'age_limit': 0,
  356. },
  357. }]
  358. _OBJECT_TYPE = 'channel'