PageRenderTime 60ms CodeModel.GetById 25ms RepoModel.GetById 1ms app.codeStats 0ms

/mediacore/lib/filetypes.py

https://github.com/karatatar/mediacore
Python | 391 lines | 288 code | 22 blank | 81 comment | 15 complexity | cf6a970252c97ba4c062b4413654f2f4 MD5 | raw file
  1. # This file is a part of MediaCore, Copyright 2009 Simple Station Inc.
  2. #
  3. # MediaCore is free software: you can redistribute it and/or modify
  4. # it under the terms of the GNU General Public License as published by
  5. # the Free Software Foundation, either version 3 of the License, or
  6. # (at your option) any later version.
  7. #
  8. # MediaCore is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  15. import re
  16. from pylons import config, request
  17. __all__ = [
  18. 'accepted_extensions',
  19. 'guess_container_format'
  20. 'guess_media_type',
  21. 'guess_mimetype',
  22. 'parse_embed_url',
  23. 'pick_media_file_player',
  24. ]
  25. AUDIO = 'audio'
  26. VIDEO = 'video'
  27. CAPTIONS = 'captions'
  28. # Mimetypes for all file extensions accepted by the front and backend uploaders
  29. #
  30. # OTHER USES:
  31. # 1) To determine the mimetype to serve, based on a MediaFile's container type.
  32. # 2) In conjunction with the container_lookup dict below to determine the
  33. # container type for a MediaFile, based on the uploaded file's extension.
  34. #
  35. # XXX: The keys in this dict are sometimes treated as names for container types
  36. # and sometimes treated as file extensions. Caveat coder.
  37. # TODO: Replace with a more complete list or (even better) change the logic
  38. # to detect mimetypes from something other than the file extension.
  39. mimetype_lookup = {
  40. 'flac': 'audio/flac',
  41. 'mp3': 'audio/mpeg',
  42. 'mp4': '%s/mp4',
  43. 'm4a': '%s/mp4',
  44. 'm4v': '%s/mp4',
  45. 'ogg': '%s/ogg',
  46. '3gp': '%s/3gpp',
  47. '3g2': '%s/3gpp',
  48. 'divx': 'video/mpeg',
  49. 'dv': 'video/x-dv',
  50. 'dvx': 'video/mpeg',
  51. 'flv': 'video/x-flv', # made up, it's what everyone uses anyway.
  52. 'mov': 'video/quicktime',
  53. 'mpeg': '%s/mpeg',
  54. 'mpg': '%s/mpeg',
  55. 'qt': 'video/quicktime',
  56. 'vob': '%s/x-vob', # multiplexed container format
  57. 'wmv': 'video/x-ms-wmv',
  58. 'xml': 'application/ttml+xml',
  59. 'srt': 'text/plain',
  60. }
  61. # Default container format (and also file extension) for each mimetype we allow
  62. # users to upload.
  63. container_lookup = {
  64. 'audio/flac': 'flac',
  65. 'audio/mp4': 'mp4',
  66. 'audio/mpeg': 'mp3',
  67. 'audio/ogg': 'ogg',
  68. 'video/3gpp': '3gp',
  69. 'video/mp4': 'mp4',
  70. 'video/mpeg': 'mpg',
  71. 'video/ogg': 'ogg',
  72. 'video/quicktime': 'mov',
  73. 'video/x-dv': 'dv',
  74. 'video/x-flv': 'flv',
  75. 'video/x-ms-wmv': 'wmv',
  76. 'video/x-vob': 'vob',
  77. 'application/ttml+xml': 'xml',
  78. 'text/plain': 'srt',
  79. }
  80. # When media_obj.container doesn't match a key in the mimetype_lookup dict...
  81. default_media_mimetype = 'application/octet-stream'
  82. # File extension map to audio, video or captions
  83. guess_media_type_map = {
  84. 'mp3': AUDIO,
  85. 'm4a': AUDIO,
  86. 'flac': AUDIO,
  87. 'mp4': VIDEO,
  88. 'm4v': VIDEO,
  89. 'ogg': VIDEO,
  90. '3gp': VIDEO,
  91. '3g2': VIDEO,
  92. 'divx': VIDEO,
  93. 'dv': VIDEO,
  94. 'dvx': VIDEO,
  95. 'flv': VIDEO,
  96. 'mov': VIDEO,
  97. 'mpeg': VIDEO,
  98. 'mpg': VIDEO,
  99. 'qt': VIDEO,
  100. 'vob': VIDEO,
  101. 'wmv': VIDEO,
  102. 'xml': CAPTIONS,
  103. 'srt': CAPTIONS,
  104. }
  105. # Patterns for embedding third party video which extract the video ID
  106. external_embedded_containers = {
  107. 'youtube': {
  108. 'pattern': re.compile('^(http(s?)://)?(\w+.)?youtube.com/watch\?(.*&)?v=(?P<id>[^&#]+)'),
  109. 'play': 'http://youtube.com/v/%s',
  110. 'link': 'http://youtube.com/watch?v=%s',
  111. 'type': VIDEO,
  112. },
  113. 'google': {
  114. 'pattern': re.compile('^(http(s?)://)?video.google.com/videoplay\?(.*&)?docid=(?P<id>-\d+)'),
  115. 'play': 'http://video.google.com/googleplayer.swf?docid=%s&hl=en&fs=true',
  116. 'link': 'http://video.google.com/videoplay?docid=%s',
  117. 'type': VIDEO,
  118. },
  119. 'vimeo': {
  120. 'pattern': re.compile('^(http(s?)://)?(www.)?vimeo.com/(?P<id>\d+)'),
  121. 'play': 'http://vimeo.com/moogaloop.swf?clip_id=%s&server=vimeo.com&show_title=1&show_byline=1&show_portrait=0&color=&fullscreen=1',
  122. 'link': 'http://vimeo.com/%s',
  123. 'type': VIDEO,
  124. },
  125. }
  126. # The list of file extensions that flash should recognize and be able to play.
  127. flash_supported_containers = ['mp3', 'mp4', 'm4v', 'm4a', 'flv', 'flac']
  128. flash_supported_browsers = ['firefox', 'opera', 'chrome', 'safari', 'unknown']
  129. # Container and Codec support for HTML5 tag in various browsers.
  130. # The following list taken from http://diveintohtml5.org/video.html#what-works
  131. # Safari also supports all default quicktime formats. But we'll keep it simple.
  132. # h264 = h264 all profiles
  133. # h264b = h264 baseline profile
  134. # aac = aac all profiles
  135. # aacl = aac low complexity profile
  136. # FIXME: While included for future usefuleness, the codecs in the list below
  137. # are ignored by the actual logic in pick_media_file_player() below.
  138. # If the media file in question has a container type that /might/ hold
  139. # a supported codec for the platform, we assume it will work.
  140. # Furthermore, the list of codec support is very incomplete.
  141. # XXX: not all container types here will be be considered playable by the
  142. # system, as the associated media files will not be marked 'encoded' as
  143. # per the playable_containers dict.
  144. native_supported_containers_codecs = {
  145. 'firefox': [
  146. (3.5, 'ogg', ['theora', 'vorbis']),
  147. ],
  148. 'opera': [
  149. (10.5, 'ogg', ['theora', 'vorbis']),
  150. ],
  151. 'chrome': [
  152. (3.0, 'ogg', ['theora', 'vorbis']),
  153. (3.0, 'mp4', ['h264', 'aac']),
  154. (3.0, 'm4v', ['h264', 'aac']),
  155. (3.0, 'm4a', []),
  156. (3.0, 'mp3', []),
  157. ],
  158. 'safari': [
  159. (522, 'mp4', ['h264', 'aac']), # revision 522 was introduced in version 3.0
  160. (522, 'm4v', ['h264', 'aac']),
  161. (522, 'm4a', []),
  162. (522, 'mp3', []),
  163. ],
  164. 'itunes': [
  165. (0, 'mp4', ['h264', 'aac']),
  166. (0, 'm4v', ['h264', 'aac']),
  167. (0, 'm4a', []),
  168. (0, 'mp3', []),
  169. ],
  170. 'iphone': [
  171. (0, 'mp4', ['h264b', 'aacl']),
  172. (0, 'm4v', ['h264b', 'aacl']),
  173. (0, 'm4a', []),
  174. (0, 'mp3', []), # TODO: Test this. We assume it is supported because Safari supports it.
  175. ],
  176. 'android': [
  177. (0, 'mp4', ['h264b', 'aacl']),
  178. (0, 'm4v', ['h264b', 'aacl']),
  179. (0, 'm4a', []),
  180. (0, 'mp3', []), # TODO: Test this. We assume it is supported because Chrome supports it.
  181. ],
  182. 'unknown': [],
  183. }
  184. # This is a wildly incomplete set of regular expressions that parse the
  185. # important numbers from the browser version for determining things like
  186. # HTML5 support.
  187. user_agent_regexes = {
  188. 'chrome': re.compile(r'Chrome.(\d+\.\d+)'), # contains the safari string. check for chrome before safari
  189. 'firefox': re.compile(r'Firefox.(\d+\.\d+)'),
  190. 'opera': re.compile(r'Opera.(\d+\.\d+)'),
  191. 'safari': re.compile(r'Safari.(\d+\.\d+)'),
  192. 'android': re.compile(r'Android.(\d+\.\d+)'),
  193. 'iphone': re.compile(r'iPhone.+Safari/(\d+\.\d+)'),
  194. 'itunes': re.compile(r'iTunes/(\d+\.\d+)'),
  195. }
  196. def accepted_extensions():
  197. """Return the extensions allowed for upload.
  198. :rtype: list
  199. """
  200. e = mimetype_lookup.keys()
  201. e = sorted(e)
  202. return e
  203. def parse_embed_url(url):
  204. """Parse the URL to return relevant info if its a for valid embed.
  205. :param url: A fully qualified URL.
  206. :returns: The container (embed site name), the unique id,
  207. and a type (audio or video).
  208. :rtype: dict or None
  209. """
  210. for container, info in external_embedded_containers.iteritems():
  211. match = info['pattern'].match(url)
  212. if match is not None:
  213. return {
  214. 'container': container,
  215. 'id': match.group('id'),
  216. 'type': info['type'],
  217. }
  218. return None
  219. def parse_user_agent_version(ua=None):
  220. """Return a tuple representing the user agent's browser name and version.
  221. :param ua: An optional User-Agent header to use. Defaults to
  222. that of the current request.
  223. :type ua: str
  224. """
  225. if ua is None:
  226. ua = request.headers.get('User-Agent', '')
  227. for device, pattern in user_agent_regexes.iteritems():
  228. match = pattern.search(ua)
  229. if match is not None:
  230. version = float(match.groups()[0])
  231. return device, version
  232. return 'unknown', 0
  233. def native_supported_types(browser, version=None):
  234. """Return the browser's supported HTML5 video containers and codecs.
  235. The browser can be determined automatically from the user agent. If
  236. no browser and no user agent is specified, the user agent is read
  237. from the request headers. See :func
  238. :param browser: Browser name from :attr:`native_browser_supported_containers`
  239. :type browser: str or None
  240. :param version: Optional version number, used when a browser arg is given.
  241. :type version: float or None
  242. :returns: The containers and codecs supported by the given browser/version.
  243. :rtype: list
  244. """
  245. if browser not in native_supported_containers_codecs:
  246. browser = 'unknown'
  247. scc = native_supported_containers_codecs[browser]
  248. native_options = []
  249. for req_version, containers, codecs in scc:
  250. if version is None or version >= req_version:
  251. native_options.append((containers, codecs))
  252. return native_options
  253. def guess_container_format(extension):
  254. """Return the most likely container format based on the file extension.
  255. This standardizes to an audio/video-agnostic form of the container, if
  256. applicable. For example m4v becomes mp4.
  257. :param extension: the file extension, without a preceding period.
  258. :type extension: string
  259. :rtype: string or None
  260. """
  261. mt = guess_mimetype(extension)
  262. return container_lookup.get(mt, None)
  263. def guess_media_type(extension=None, embed=None, default=VIDEO):
  264. """Return the most likely media type based on the container or embed site.
  265. :param extension: Optional, the file extension without a preceding period.
  266. :param embed: Optional, the third-party site name.
  267. :param default: Default to video if we don't have any other guess.
  268. :returns: 'audio', 'video', 'captions', or None
  269. """
  270. if extension is not None:
  271. return guess_media_type_map.get(extension, default)
  272. if embed is not None:
  273. return external_embedded_containers.get(embed, {}).get('type', default)
  274. return default
  275. def guess_mimetype(container, type_=None, default=None):
  276. """Return the best guess mimetype for the given container.
  277. If the type (audio or video) is not provided, we make our best guess
  278. as to which is will probably be, using :func:`guess_container_type`.
  279. Note that this value is ignored for certain mimetypes: it's useful
  280. only when a container can be both audio and video.
  281. :param container: The file extension
  282. :param type_: 'audio', 'video' or 'captions'
  283. :param default: Default mimetype for when guessing fails
  284. :returns: A mime string or None.
  285. """
  286. if type_ is None:
  287. type_ = guess_media_type(container)
  288. mt = mimetype_lookup.get(container, None)
  289. if mt is None:
  290. return default or default_media_mimetype
  291. try:
  292. return mt % type_
  293. except (ValueError, TypeError):
  294. return mt
  295. def pick_media_file_player(files, browser=None, version=None, user_agent=None):
  296. """Return the best choice of files to play and which player to use.
  297. XXX: This method uses the very unsophisticated technique of assuming
  298. that if the client is capable of playing the container format, then
  299. the client should be able to play the tracks within the container,
  300. regardless of the codecs actually used. As such, admins would be
  301. well advised to use the lowest-common-denominator for their targeted
  302. clients when using files for consumption in an HTML5 player, and
  303. to use the standard codecs when encoding for Flash player use.
  304. :param files: :class:`~mediacore.model.media.MediaFile` instances.
  305. :type files: list
  306. :param browser: Optional browser name to bypass user agents altogether.
  307. See :attr:`native_browser_supported_containers` for possible values.
  308. :type browser: str or None
  309. :param version: Optional version number, used when a browser arg is given.
  310. :type version: float or None
  311. :param user_agent: Optional User-Agent header to use. Defaults to
  312. that of the current request.
  313. :type user_agent: str or None
  314. :returns: A :class:`~mediacore.model.media.MediaFile` and a player name.
  315. :rtype: tuple
  316. """
  317. from mediacore.lib.helpers import fetch_setting
  318. player_type = fetch_setting('player_type')
  319. if browser is None:
  320. browser, version = parse_user_agent_version(user_agent)
  321. support_html5 = player_type in ('best', 'html5')
  322. support_flash = player_type in ('best', 'flash') and \
  323. browser in flash_supported_browsers
  324. # Only proceed if this file is a playable type
  325. files = [file for file in files if file.type in (AUDIO, VIDEO)]
  326. # First, check if it's an embedded video from another site.
  327. if support_flash:
  328. for file in files:
  329. if file.container in external_embedded_containers:
  330. # TODO: Support vimeo and youtube in our jwplayer/etc
  331. return file, 'embed'
  332. # If possible, return an applicable file and html5 player
  333. # Note that this is currently based only on the container type
  334. if support_html5:
  335. for container, codecs in native_supported_types(browser, version):
  336. for file in files:
  337. if file.container == container:
  338. return file, fetch_setting('html5_player')
  339. # If possible, return an applicable file and flash player
  340. if support_flash:
  341. for file in files:
  342. if file.container in flash_supported_containers:
  343. return file, fetch_setting('flash_player')
  344. # No acceptable file/player combination could be found.
  345. return None, None