PageRenderTime 31ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 1ms

/xbmc/cores/dvdplayer/DVDCodecs/Video/DXVA.cpp

http://github.com/xbmc/xbmc
C++ | 1618 lines | 1290 code | 249 blank | 79 comment | 237 complexity | 3103d0a459322864d14551b3fc768147 MD5 | raw file
Possible License(s): GPL-3.0, CC-BY-SA-3.0, LGPL-2.0, 0BSD, Unlicense, GPL-2.0, AGPL-1.0, BSD-3-Clause, LGPL-2.1, LGPL-3.0
  1. /*
  2. * Copyright (C) 2005-2013 Team XBMC
  3. * http://xbmc.org
  4. *
  5. * This Program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2, or (at your option)
  8. * any later version.
  9. *
  10. * This Program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with XBMC; see the file COPYING. If not, see
  17. * <http://www.gnu.org/licenses/>.
  18. *
  19. */
  20. #ifdef HAS_DX
  21. // setting that here because otherwise SampleFormat is defined to AVSampleFormat
  22. // which we don't use here
  23. #define FF_API_OLD_SAMPLE_FMT 0
  24. #include <windows.h>
  25. #include <d3d9.h>
  26. #include <Initguid.h>
  27. #include <dxva.h>
  28. #include <dxva2api.h>
  29. #include "libavcodec/dxva2.h"
  30. #include "../DVDCodecUtils.h"
  31. #include "DXVA.h"
  32. #include "windowing/WindowingFactory.h"
  33. #include "../../../VideoRenderers/WinRenderer.h"
  34. #include "settings/Settings.h"
  35. #include "settings/MediaSettings.h"
  36. #include "boost/shared_ptr.hpp"
  37. #include "utils/AutoPtrHandle.h"
  38. #include "settings/AdvancedSettings.h"
  39. #include "settings/MediaSettings.h"
  40. #include "cores/VideoRenderers/RenderManager.h"
  41. #include "win32/WIN32Util.h"
  42. #define ALLOW_ADDING_SURFACES 0
  43. using namespace DXVA;
  44. using namespace AUTOPTR;
  45. using namespace std;
  46. typedef HRESULT (__stdcall *DXVA2CreateVideoServicePtr)(IDirect3DDevice9* pDD, REFIID riid, void** ppService);
  47. static DXVA2CreateVideoServicePtr g_DXVA2CreateVideoService;
  48. static bool LoadDXVA()
  49. {
  50. static CCriticalSection g_section;
  51. static HMODULE g_handle;
  52. CSingleLock lock(g_section);
  53. if(g_handle == NULL)
  54. g_handle = LoadLibraryEx("dxva2.dll", NULL, 0);
  55. if(g_handle == NULL)
  56. return false;
  57. g_DXVA2CreateVideoService = (DXVA2CreateVideoServicePtr)GetProcAddress(g_handle, "DXVA2CreateVideoService");
  58. if(g_DXVA2CreateVideoService == NULL)
  59. return false;
  60. return true;
  61. }
  62. static void RelBufferS(AVCodecContext *avctx, AVFrame *pic)
  63. { ((CDecoder*)((CDVDVideoCodecFFmpeg*)avctx->opaque)->GetHardware())->RelBuffer(avctx, pic); }
  64. static int GetBufferS(AVCodecContext *avctx, AVFrame *pic)
  65. { return ((CDecoder*)((CDVDVideoCodecFFmpeg*)avctx->opaque)->GetHardware())->GetBuffer(avctx, pic); }
  66. DEFINE_GUID(DXVADDI_Intel_ModeH264_A, 0x604F8E64,0x4951,0x4c54,0x88,0xFE,0xAB,0xD2,0x5C,0x15,0xB3,0xD6);
  67. DEFINE_GUID(DXVADDI_Intel_ModeH264_C, 0x604F8E66,0x4951,0x4c54,0x88,0xFE,0xAB,0xD2,0x5C,0x15,0xB3,0xD6);
  68. DEFINE_GUID(DXVADDI_Intel_ModeH264_E, 0x604F8E68,0x4951,0x4c54,0x88,0xFE,0xAB,0xD2,0x5C,0x15,0xB3,0xD6);
  69. DEFINE_GUID(DXVADDI_Intel_ModeVC1_E , 0xBCC5DB6D,0xA2B6,0x4AF0,0xAC,0xE4,0xAD,0xB1,0xF7,0x87,0xBC,0x89);
  70. #if _MSC_VER < 1700
  71. DEFINE_GUID(DXVA_ModeMPEG2and1_VLD, 0x86695f12,0x340e,0x4f04,0x9f,0xd3,0x92,0x53,0xdd,0x32,0x74,0x60);
  72. // When exposed by an accelerator, indicates compliance with the August 2010 spec update
  73. DEFINE_GUID(DXVA_ModeVC1_D2010, 0x1b81beA4,0xa0c7,0x11d3,0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5);
  74. #endif
  75. typedef struct {
  76. const char *name;
  77. const GUID *guid;
  78. int codec;
  79. } dxva2_mode_t;
  80. /* XXX Prefered modes must come first */
  81. static const dxva2_mode_t dxva2_modes[] = {
  82. { "MPEG2 VLD", &DXVA2_ModeMPEG2_VLD, AV_CODEC_ID_MPEG2VIDEO },
  83. { "MPEG1/2 VLD", &DXVA_ModeMPEG2and1_VLD, AV_CODEC_ID_MPEG2VIDEO },
  84. { "MPEG2 MoComp", &DXVA2_ModeMPEG2_MoComp, 0 },
  85. { "MPEG2 IDCT", &DXVA2_ModeMPEG2_IDCT, 0 },
  86. // Intel drivers return standard modes in addition to the Intel specific ones. Try the Intel specific first, they work better for Sandy Bridges.
  87. { "Intel H.264 VLD, no FGT", &DXVADDI_Intel_ModeH264_E, AV_CODEC_ID_H264 },
  88. { "Intel H.264 inverse discrete cosine transform (IDCT), no FGT", &DXVADDI_Intel_ModeH264_C, 0 },
  89. { "Intel H.264 motion compensation (MoComp), no FGT", &DXVADDI_Intel_ModeH264_A, 0 },
  90. { "Intel VC-1 VLD", &DXVADDI_Intel_ModeVC1_E, 0 },
  91. { "H.264 variable-length decoder (VLD), FGT", &DXVA2_ModeH264_F, AV_CODEC_ID_H264 },
  92. { "H.264 VLD, no FGT", &DXVA2_ModeH264_E, AV_CODEC_ID_H264 },
  93. { "H.264 IDCT, FGT", &DXVA2_ModeH264_D, 0, },
  94. { "H.264 inverse discrete cosine transform (IDCT), no FGT", &DXVA2_ModeH264_C, 0, },
  95. { "H.264 MoComp, FGT", &DXVA2_ModeH264_B, 0, },
  96. { "H.264 motion compensation (MoComp), no FGT", &DXVA2_ModeH264_A, 0, },
  97. { "Windows Media Video 8 MoComp", &DXVA2_ModeWMV8_B, 0 },
  98. { "Windows Media Video 8 post processing", &DXVA2_ModeWMV8_A, 0 },
  99. { "Windows Media Video 9 IDCT", &DXVA2_ModeWMV9_C, 0 },
  100. { "Windows Media Video 9 MoComp", &DXVA2_ModeWMV9_B, 0 },
  101. { "Windows Media Video 9 post processing", &DXVA2_ModeWMV9_A, 0 },
  102. { "VC-1 VLD", &DXVA2_ModeVC1_D, AV_CODEC_ID_VC1 },
  103. { "VC-1 VLD", &DXVA2_ModeVC1_D, AV_CODEC_ID_WMV3 },
  104. { "VC-1 VLD 2010", &DXVA_ModeVC1_D2010, AV_CODEC_ID_VC1 },
  105. { "VC-1 VLD 2010", &DXVA_ModeVC1_D2010, AV_CODEC_ID_WMV3 },
  106. { "VC-1 IDCT", &DXVA2_ModeVC1_C, 0 },
  107. { "VC-1 MoComp", &DXVA2_ModeVC1_B, 0 },
  108. { "VC-1 post processing", &DXVA2_ModeVC1_A, 0 },
  109. { NULL, NULL, 0 }
  110. };
  111. DEFINE_GUID(DXVA2_VideoProcATIVectorAdaptiveDevice, 0x3C5323C1,0x6fb7,0x44f5,0x90,0x81,0x05,0x6b,0xf2,0xee,0x44,0x9d);
  112. DEFINE_GUID(DXVA2_VideoProcATIMotionAdaptiveDevice, 0x552C0DAD,0xccbc,0x420b,0x83,0xc8,0x74,0x94,0x3c,0xf9,0xf1,0xa6);
  113. DEFINE_GUID(DXVA2_VideoProcATIAdaptiveDevice, 0x6E8329FF,0xb642,0x418b,0xbc,0xf0,0xbc,0xb6,0x59,0x1e,0x25,0x5f);
  114. DEFINE_GUID(DXVA2_VideoProcNVidiaAdaptiveDevice, 0x6CB69578,0x7617,0x4637,0x91,0xE5,0x1C,0x02,0xDB,0x81,0x02,0x85);
  115. DEFINE_GUID(DXVA2_VideoProcIntelEdgeDevice, 0xBF752EF6,0x8CC4,0x457A,0xBE,0x1B,0x08,0xBD,0x1C,0xAE,0xEE,0x9F);
  116. DEFINE_GUID(DXVA2_VideoProcNVidiaUnknownDevice, 0xF9F19DA5,0x3B09,0x4B2F,0x9D,0x89,0xC6,0x47,0x53,0xE3,0xEA,0xAB);
  117. typedef struct {
  118. const char *name;
  119. const GUID *guid;
  120. } dxva2_device_t;
  121. static const dxva2_device_t dxva2_devices[] = {
  122. { "Progressive Device", &DXVA2_VideoProcProgressiveDevice },
  123. { "Bob Device", &DXVA2_VideoProcBobDevice },
  124. { "Vector Adaptative Device", &DXVA2_VideoProcATIVectorAdaptiveDevice },
  125. { "Motion Adaptative Device", &DXVA2_VideoProcATIMotionAdaptiveDevice },
  126. { "Adaptative Device", &DXVA2_VideoProcATIAdaptiveDevice },
  127. { "Spatial-temporal device", &DXVA2_VideoProcNVidiaAdaptiveDevice },
  128. { "Edge directed device", &DXVA2_VideoProcIntelEdgeDevice },
  129. { "Unknown device (nVidia)", &DXVA2_VideoProcNVidiaUnknownDevice },
  130. { NULL, NULL }
  131. };
  132. typedef struct {
  133. const char *name;
  134. unsigned flags;
  135. } dxva2_deinterlacetech_t;
  136. static const dxva2_deinterlacetech_t dxva2_deinterlacetechs[] = {
  137. { "Inverse Telecine", DXVA2_DeinterlaceTech_InverseTelecine },
  138. { "Motion vector steered", DXVA2_DeinterlaceTech_MotionVectorSteered },
  139. { "Pixel adaptive", DXVA2_DeinterlaceTech_PixelAdaptive },
  140. { "Field adaptive", DXVA2_DeinterlaceTech_FieldAdaptive },
  141. { "Edge filtering", DXVA2_DeinterlaceTech_EdgeFiltering },
  142. { "Median filtering", DXVA2_DeinterlaceTech_MedianFiltering },
  143. { "Bob vertical stretch 4-tap", DXVA2_DeinterlaceTech_BOBVerticalStretch4Tap },
  144. { "Bob vertical stretch", DXVA2_DeinterlaceTech_BOBVerticalStretch },
  145. { "Bob line replicate", DXVA2_DeinterlaceTech_BOBLineReplicate },
  146. { "Unknown", DXVA2_DeinterlaceTech_Unknown },
  147. { NULL, 0 }
  148. };
  149. // Prefered targets must be first
  150. static const D3DFORMAT render_targets[] = {
  151. (D3DFORMAT)MAKEFOURCC('N','V','1','2'),
  152. (D3DFORMAT)MAKEFOURCC('Y','V','1','2'),
  153. D3DFMT_UNKNOWN
  154. };
  155. // List of PCI Device ID of ATI cards with UVD or UVD+ decoding block.
  156. static DWORD UVDDeviceID [] = {
  157. 0x95C0, // ATI Radeon HD 3400 Series (and others)
  158. 0x95C5, // ATI Radeon HD 3400 Series (and others)
  159. 0x95C4, // ATI Radeon HD 3400 Series (and others)
  160. 0x94C3, // ATI Radeon HD 3410
  161. 0x9589, // ATI Radeon HD 3600 Series (and others)
  162. 0x9598, // ATI Radeon HD 3600 Series (and others)
  163. 0x9591, // ATI Radeon HD 3600 Series (and others)
  164. 0x9501, // ATI Radeon HD 3800 Series (and others)
  165. 0x9505, // ATI Radeon HD 3800 Series (and others)
  166. 0x9507, // ATI Radeon HD 3830
  167. 0x9513, // ATI Radeon HD 3850 X2
  168. 0x950F, // ATI Radeon HD 3850 X2
  169. 0x0000
  170. };
  171. // List of PCI Device ID of nVidia cards with the macroblock width issue. More or less the VP3 block.
  172. // Per NVIDIA Accelerated Linux Graphics Driver, Appendix A Supported NVIDIA GPU Products, cards with note 1.
  173. static DWORD VP3DeviceID [] = {
  174. 0x06E0, // GeForce 9300 GE
  175. 0x06E1, // GeForce 9300 GS
  176. 0x06E2, // GeForce 8400
  177. 0x06E4, // GeForce 8400 GS
  178. 0x06E5, // GeForce 9300M GS
  179. 0x06E6, // GeForce G100
  180. 0x06E8, // GeForce 9200M GS
  181. 0x06E9, // GeForce 9300M GS
  182. 0x06EC, // GeForce G 105M
  183. 0x06EF, // GeForce G 103M
  184. 0x06F1, // GeForce G105M
  185. 0x0844, // GeForce 9100M G
  186. 0x0845, // GeForce 8200M G
  187. 0x0846, // GeForce 9200
  188. 0x0847, // GeForce 9100
  189. 0x0848, // GeForce 8300
  190. 0x0849, // GeForce 8200
  191. 0x084A, // nForce 730a
  192. 0x084B, // GeForce 9200
  193. 0x084C, // nForce 980a/780a SLI
  194. 0x084D, // nForce 750a SLI
  195. 0x0860, // GeForce 9400
  196. 0x0861, // GeForce 9400
  197. 0x0862, // GeForce 9400M G
  198. 0x0863, // GeForce 9400M
  199. 0x0864, // GeForce 9300
  200. 0x0865, // ION
  201. 0x0866, // GeForce 9400M G
  202. 0x0867, // GeForce 9400
  203. 0x0868, // nForce 760i SLI
  204. 0x086A, // GeForce 9400
  205. 0x086C, // GeForce 9300 / nForce 730i
  206. 0x086D, // GeForce 9200
  207. 0x086E, // GeForce 9100M G
  208. 0x086F, // GeForce 8200M G
  209. 0x0870, // GeForce 9400M
  210. 0x0871, // GeForce 9200
  211. 0x0872, // GeForce G102M
  212. 0x0873, // GeForce G102M
  213. 0x0874, // ION
  214. 0x0876, // ION
  215. 0x087A, // GeForce 9400
  216. 0x087D, // ION
  217. 0x087E, // ION LE
  218. 0x087F, // ION LE
  219. 0x0000
  220. };
  221. typedef struct {
  222. DWORD VendorID;
  223. DWORD DeviceID;
  224. } pci_device;
  225. // List of devices that drop frames with a deinterlacing processor for progressive material.
  226. static const pci_device NoDeintProcForProgDevices[] = {
  227. { PCIV_nVidia, 0x0865 }, // ION
  228. { PCIV_nVidia, 0x0874 }, // ION
  229. { PCIV_nVidia, 0x0876 }, // ION
  230. { PCIV_nVidia, 0x087D }, // ION
  231. { PCIV_nVidia, 0x087E }, // ION LE
  232. { PCIV_nVidia, 0x087F }, // ION LE
  233. { 0 , 0x0000 }
  234. };
  235. static CStdString GUIDToString(const GUID& guid)
  236. {
  237. CStdString buffer;
  238. buffer.Format("%08X-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x"
  239. , guid.Data1, guid.Data2, guid.Data3
  240. , guid.Data4[0], guid.Data4[1]
  241. , guid.Data4[2], guid.Data4[3], guid.Data4[4]
  242. , guid.Data4[5], guid.Data4[6], guid.Data4[7]);
  243. return buffer;
  244. }
  245. static const dxva2_mode_t *dxva2_find_mode(const GUID *guid)
  246. {
  247. for (unsigned i = 0; dxva2_modes[i].name; i++) {
  248. if (IsEqualGUID(*dxva2_modes[i].guid, *guid))
  249. return &dxva2_modes[i];
  250. }
  251. return NULL;
  252. }
  253. static const dxva2_device_t *dxva2_find_device(const GUID *guid)
  254. {
  255. for (unsigned i = 0; dxva2_devices[i].name; i++) {
  256. if (IsEqualGUID(*dxva2_devices[i].guid, *guid))
  257. return &dxva2_devices[i];
  258. }
  259. return NULL;
  260. }
  261. static const dxva2_deinterlacetech_t *dxva2_find_deinterlacetech(unsigned flags)
  262. {
  263. for (unsigned i = 0; dxva2_deinterlacetechs[i].name; i++) {
  264. if (dxva2_deinterlacetechs[i].flags == flags)
  265. return &dxva2_deinterlacetechs[i];
  266. }
  267. return NULL;
  268. }
  269. #define SCOPE(type, var) boost::shared_ptr<type> var##_holder(var, CoTaskMemFree);
  270. CSurfaceContext::CSurfaceContext()
  271. {
  272. }
  273. CSurfaceContext::~CSurfaceContext()
  274. {
  275. for (vector<IDirect3DSurface9*>::iterator it = m_heldsurfaces.begin(); it != m_heldsurfaces.end(); ++it)
  276. SAFE_RELEASE(*it);
  277. }
  278. void CSurfaceContext::HoldSurface(IDirect3DSurface9* surface)
  279. {
  280. surface->AddRef();
  281. m_heldsurfaces.push_back(surface);
  282. }
  283. CDecoder::SVideoBuffer::SVideoBuffer()
  284. {
  285. surface = NULL;
  286. Clear();
  287. }
  288. CDecoder::SVideoBuffer::~SVideoBuffer()
  289. {
  290. Clear();
  291. }
  292. void CDecoder::SVideoBuffer::Clear()
  293. {
  294. SAFE_RELEASE(surface);
  295. age = 0;
  296. used = 0;
  297. }
  298. CDecoder::CDecoder()
  299. : m_event(true)
  300. {
  301. m_event.Set();
  302. m_state = DXVA_OPEN;
  303. m_service = NULL;
  304. m_device = NULL;
  305. m_decoder = NULL;
  306. m_buffer_count = 0;
  307. m_buffer_age = 0;
  308. m_refs = 0;
  309. m_shared = 0;
  310. m_surface_context = NULL;
  311. memset(&m_format, 0, sizeof(m_format));
  312. m_context = (dxva_context*)calloc(1, sizeof(dxva_context));
  313. m_context->cfg = (DXVA2_ConfigPictureDecode*)calloc(1, sizeof(DXVA2_ConfigPictureDecode));
  314. m_context->surface = (IDirect3DSurface9**)calloc(m_buffer_max, sizeof(IDirect3DSurface9*));
  315. g_Windowing.Register(this);
  316. }
  317. CDecoder::~CDecoder()
  318. {
  319. g_Windowing.Unregister(this);
  320. Close();
  321. free(m_context->surface);
  322. free(const_cast<DXVA2_ConfigPictureDecode*>(m_context->cfg)); // yes this is foobar
  323. free(m_context);
  324. }
  325. void CDecoder::Close()
  326. {
  327. CSingleLock lock(m_section);
  328. SAFE_RELEASE(m_decoder);
  329. SAFE_RELEASE(m_service);
  330. SAFE_RELEASE(m_surface_context);
  331. for(unsigned i = 0; i < m_buffer_count; i++)
  332. m_buffer[i].Clear();
  333. m_buffer_count = 0;
  334. memset(&m_format, 0, sizeof(m_format));
  335. }
  336. #define CHECK(a) \
  337. do { \
  338. HRESULT res = a; \
  339. if(FAILED(res)) \
  340. { \
  341. CLog::Log(LOGERROR, "DXVA - failed executing "#a" at line %d with error %x", __LINE__, res); \
  342. return false; \
  343. } \
  344. } while(0);
  345. static bool CheckH264L41(AVCodecContext *avctx)
  346. {
  347. unsigned widthmbs = (avctx->coded_width + 15) / 16; // width in macroblocks
  348. unsigned heightmbs = (avctx->coded_height + 15) / 16; // height in macroblocks
  349. unsigned maxdpbmbs = 32768; // Decoded Picture Buffer (DPB) capacity in macroblocks for L4.1
  350. return (avctx->refs * widthmbs * heightmbs <= maxdpbmbs);
  351. }
  352. static bool IsL41LimitedATI()
  353. {
  354. D3DADAPTER_IDENTIFIER9 AIdentifier = g_Windowing.GetAIdentifier();
  355. if(AIdentifier.VendorId == PCIV_ATI)
  356. {
  357. for (unsigned idx = 0; UVDDeviceID[idx] != 0; idx++)
  358. {
  359. if (UVDDeviceID[idx] == AIdentifier.DeviceId)
  360. return true;
  361. }
  362. }
  363. return false;
  364. }
  365. static bool HasVP3WidthBug(AVCodecContext *avctx)
  366. {
  367. // Some nVidia VP3 hardware cannot do certain macroblock widths
  368. D3DADAPTER_IDENTIFIER9 AIdentifier = g_Windowing.GetAIdentifier();
  369. if(AIdentifier.VendorId == PCIV_nVidia
  370. && !CDVDCodecUtils::IsVP3CompatibleWidth(avctx->coded_width))
  371. {
  372. // Find the card in a known list of problematic VP3 hardware
  373. for (unsigned idx = 0; VP3DeviceID[idx] != 0; idx++)
  374. if (VP3DeviceID[idx] == AIdentifier.DeviceId)
  375. return true;
  376. }
  377. return false;
  378. }
  379. static bool CheckCompatibility(AVCodecContext *avctx)
  380. {
  381. // The incompatibilities are all for H264
  382. if(avctx->codec_id != AV_CODEC_ID_H264)
  383. return true;
  384. // Macroblock width incompatibility
  385. if (HasVP3WidthBug(avctx))
  386. {
  387. CLog::Log(LOGWARNING,"DXVA - width %i is not supported with nVidia VP3 hardware. DXVA will not be used", avctx->coded_width);
  388. return false;
  389. }
  390. // Check for hardware limited to H264 L4.1 (ie Bluray).
  391. // No advanced settings: autodetect.
  392. // The advanced setting lets the user override the autodetection (in case of false positive or negative)
  393. bool checkcompat;
  394. if (!g_advancedSettings.m_DXVACheckCompatibilityPresent)
  395. checkcompat = IsL41LimitedATI(); // ATI UVD and UVD+ cards can only do L4.1 - corresponds roughly to series 3xxx
  396. else
  397. checkcompat = g_advancedSettings.m_DXVACheckCompatibility;
  398. if (checkcompat && !CheckH264L41(avctx))
  399. {
  400. CLog::Log(LOGWARNING, "DXVA - compatibility check: video exceeds L4.1. DXVA will not be used.");
  401. return false;
  402. }
  403. return true;
  404. }
  405. bool CDecoder::Open(AVCodecContext *avctx, enum PixelFormat fmt, unsigned int surfaces)
  406. {
  407. if (!CheckCompatibility(avctx))
  408. return false;
  409. if(!LoadDXVA())
  410. return false;
  411. CSingleLock lock(m_section);
  412. Close();
  413. if(m_state == DXVA_LOST)
  414. {
  415. CLog::Log(LOGDEBUG, "DXVA - device is in lost state, we can't start");
  416. return false;
  417. }
  418. CHECK(g_DXVA2CreateVideoService(g_Windowing.Get3DDevice(), IID_IDirectXVideoDecoderService, (void**)&m_service))
  419. UINT input_count;
  420. GUID *input_list;
  421. CHECK(m_service->GetDecoderDeviceGuids(&input_count, &input_list))
  422. SCOPE(GUID, input_list);
  423. for(unsigned i = 0; i < input_count; i++)
  424. {
  425. const GUID *g = &input_list[i];
  426. const dxva2_mode_t *mode = dxva2_find_mode(g);
  427. if(mode)
  428. CLog::Log(LOGDEBUG, "DXVA - supports '%s'", mode->name);
  429. else
  430. CLog::Log(LOGDEBUG, "DXVA - supports %s", GUIDToString(*g).c_str());
  431. }
  432. m_format.Format = D3DFMT_UNKNOWN;
  433. for(const dxva2_mode_t* mode = dxva2_modes; mode->name && m_format.Format == D3DFMT_UNKNOWN; mode++)
  434. {
  435. if(mode->codec != avctx->codec_id)
  436. continue;
  437. for(unsigned j = 0; j < input_count; j++)
  438. {
  439. if(!IsEqualGUID(input_list[j], *mode->guid))
  440. continue;
  441. CLog::Log(LOGDEBUG, "DXVA - trying '%s'", mode->name);
  442. if(OpenTarget(input_list[j]))
  443. break;
  444. }
  445. }
  446. if(m_format.Format == D3DFMT_UNKNOWN)
  447. {
  448. CLog::Log(LOGDEBUG, "DXVA - unable to find an input/output format combination");
  449. return false;
  450. }
  451. m_format.SampleWidth = avctx->coded_width;
  452. m_format.SampleHeight = avctx->coded_height;
  453. m_format.SampleFormat.SampleFormat = DXVA2_SampleProgressiveFrame;
  454. m_format.SampleFormat.VideoLighting = DXVA2_VideoLighting_dim;
  455. if (avctx->color_range == AVCOL_RANGE_JPEG)
  456. m_format.SampleFormat.NominalRange = DXVA2_NominalRange_0_255;
  457. else if(avctx->color_range == AVCOL_RANGE_MPEG)
  458. m_format.SampleFormat.NominalRange = DXVA2_NominalRange_16_235;
  459. else
  460. m_format.SampleFormat.NominalRange = DXVA2_NominalRange_Unknown;
  461. switch(avctx->chroma_sample_location)
  462. {
  463. case AVCHROMA_LOC_LEFT:
  464. m_format.SampleFormat.VideoChromaSubsampling = DXVA2_VideoChromaSubsampling_Horizontally_Cosited
  465. | DXVA2_VideoChromaSubsampling_Vertically_AlignedChromaPlanes;
  466. break;
  467. case AVCHROMA_LOC_CENTER:
  468. m_format.SampleFormat.VideoChromaSubsampling = DXVA2_VideoChromaSubsampling_Vertically_AlignedChromaPlanes;
  469. break;
  470. case AVCHROMA_LOC_TOPLEFT:
  471. m_format.SampleFormat.VideoChromaSubsampling = DXVA2_VideoChromaSubsampling_Horizontally_Cosited
  472. | DXVA2_VideoChromaSubsampling_Vertically_Cosited;
  473. break;
  474. default:
  475. m_format.SampleFormat.VideoChromaSubsampling = DXVA2_VideoChromaSubsampling_Unknown;
  476. }
  477. switch(avctx->colorspace)
  478. {
  479. case AVCOL_SPC_BT709:
  480. m_format.SampleFormat.VideoTransferMatrix = DXVA2_VideoTransferMatrix_BT709;
  481. break;
  482. case AVCOL_SPC_BT470BG:
  483. case AVCOL_SPC_SMPTE170M:
  484. m_format.SampleFormat.VideoTransferMatrix = DXVA2_VideoTransferMatrix_BT601;
  485. break;
  486. case AVCOL_SPC_SMPTE240M:
  487. m_format.SampleFormat.VideoTransferMatrix = DXVA2_VideoTransferMatrix_SMPTE240M;
  488. break;
  489. case AVCOL_SPC_FCC:
  490. case AVCOL_SPC_UNSPECIFIED:
  491. case AVCOL_SPC_RGB:
  492. default:
  493. m_format.SampleFormat.VideoTransferMatrix = DXVA2_VideoTransferMatrix_Unknown;
  494. }
  495. switch(avctx->color_primaries)
  496. {
  497. case AVCOL_PRI_BT709:
  498. m_format.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_BT709;
  499. break;
  500. case AVCOL_PRI_BT470M:
  501. m_format.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_BT470_2_SysM;
  502. break;
  503. case AVCOL_PRI_BT470BG:
  504. m_format.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_BT470_2_SysBG;
  505. break;
  506. case AVCOL_PRI_SMPTE170M:
  507. m_format.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_SMPTE170M;
  508. break;
  509. case AVCOL_PRI_SMPTE240M:
  510. m_format.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_SMPTE240M;
  511. break;
  512. case AVCOL_PRI_FILM:
  513. case AVCOL_PRI_UNSPECIFIED:
  514. default:
  515. m_format.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_Unknown;
  516. }
  517. switch(avctx->color_trc)
  518. {
  519. case AVCOL_TRC_BT709:
  520. m_format.SampleFormat.VideoTransferFunction = DXVA2_VideoTransFunc_709;
  521. break;
  522. case AVCOL_TRC_GAMMA22:
  523. m_format.SampleFormat.VideoTransferFunction = DXVA2_VideoTransFunc_22;
  524. break;
  525. case AVCOL_TRC_GAMMA28:
  526. m_format.SampleFormat.VideoTransferFunction = DXVA2_VideoTransFunc_28;
  527. break;
  528. default:
  529. m_format.SampleFormat.VideoTransferFunction = DXVA2_VideoTransFunc_Unknown;
  530. }
  531. if (avctx->time_base.den > 0 && avctx->time_base.num > 0)
  532. {
  533. m_format.InputSampleFreq.Numerator = avctx->time_base.num;
  534. m_format.InputSampleFreq.Denominator = avctx->time_base.den;
  535. }
  536. m_format.OutputFrameFreq = m_format.InputSampleFreq;
  537. m_format.UABProtectionLevel = FALSE;
  538. m_format.Reserved = 0;
  539. if (surfaces > m_shared)
  540. m_shared = surfaces;
  541. if(avctx->refs > m_refs)
  542. m_refs = avctx->refs;
  543. if(m_refs == 0)
  544. {
  545. if(avctx->codec_id == AV_CODEC_ID_H264)
  546. m_refs = 16;
  547. else
  548. m_refs = 2;
  549. }
  550. CLog::Log(LOGDEBUG, "DXVA - source requires %d references", avctx->refs);
  551. // find what decode configs are available
  552. UINT cfg_count = 0;
  553. DXVA2_ConfigPictureDecode *cfg_list = NULL;
  554. CHECK(m_service->GetDecoderConfigurations(m_input
  555. , &m_format
  556. , NULL
  557. , &cfg_count
  558. , &cfg_list))
  559. SCOPE(DXVA2_ConfigPictureDecode, cfg_list);
  560. DXVA2_ConfigPictureDecode config = {};
  561. unsigned bitstream = 2; // ConfigBitstreamRaw = 2 is required for Poulsbo and handles skipping better with nVidia
  562. for(unsigned i = 0; i< cfg_count; i++)
  563. {
  564. CLog::Log(LOGDEBUG,
  565. "DXVA - config %d: bitstream type %d%s",
  566. i,
  567. cfg_list[i].ConfigBitstreamRaw,
  568. IsEqualGUID(cfg_list[i].guidConfigBitstreamEncryption, DXVA_NoEncrypt) ? "" : ", encrypted");
  569. // select first available
  570. if(config.ConfigBitstreamRaw == 0 && cfg_list[i].ConfigBitstreamRaw != 0)
  571. config = cfg_list[i];
  572. // overide with preferred if found
  573. if(config.ConfigBitstreamRaw != bitstream && cfg_list[i].ConfigBitstreamRaw == bitstream)
  574. config = cfg_list[i];
  575. }
  576. if(!config.ConfigBitstreamRaw)
  577. {
  578. CLog::Log(LOGDEBUG, "DXVA - failed to find a raw input bitstream");
  579. return false;
  580. }
  581. *const_cast<DXVA2_ConfigPictureDecode*>(m_context->cfg) = config;
  582. m_surface_context = new CSurfaceContext();
  583. if(!OpenDecoder())
  584. return false;
  585. avctx->get_buffer = GetBufferS;
  586. avctx->release_buffer = RelBufferS;
  587. avctx->hwaccel_context = m_context;
  588. if (IsL41LimitedATI())
  589. {
  590. #ifdef FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG
  591. m_context->workaround |= FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG;
  592. #else
  593. CLog::Log(LOGWARNING, "DXVA - video card with different scaling list zigzag order detected, but no support in libavcodec");
  594. #endif
  595. }
  596. m_state = DXVA_OPEN;
  597. return true;
  598. }
  599. int CDecoder::Decode(AVCodecContext* avctx, AVFrame* frame)
  600. {
  601. CSingleLock lock(m_section);
  602. int result = Check(avctx);
  603. if(result)
  604. return result;
  605. if(frame)
  606. {
  607. for(unsigned i = 0; i < m_buffer_count; i++)
  608. {
  609. if(m_buffer[i].surface == (IDirect3DSurface9*)frame->data[3])
  610. return VC_BUFFER | VC_PICTURE;
  611. }
  612. CLog::Log(LOGWARNING, "DXVA - ignoring invalid surface");
  613. return VC_BUFFER;
  614. }
  615. else
  616. return 0;
  617. }
  618. bool CDecoder::GetPicture(AVCodecContext* avctx, AVFrame* frame, DVDVideoPicture* picture)
  619. {
  620. ((CDVDVideoCodecFFmpeg*)avctx->opaque)->GetPictureCommon(picture);
  621. CSingleLock lock(m_section);
  622. picture->format = RENDER_FMT_DXVA;
  623. picture->extended_format = (unsigned int)m_format.Format;
  624. picture->context = m_surface_context;
  625. picture->data[3]= frame->data[3];
  626. return true;
  627. }
  628. int CDecoder::Check(AVCodecContext* avctx)
  629. {
  630. CSingleLock lock(m_section);
  631. if(m_state == DXVA_RESET)
  632. Close();
  633. if(m_state == DXVA_LOST)
  634. {
  635. Close();
  636. lock.Leave();
  637. m_event.WaitMSec(2000);
  638. lock.Enter();
  639. if(m_state == DXVA_LOST)
  640. {
  641. CLog::Log(LOGERROR, "CDecoder::Check - device didn't reset in reasonable time");
  642. return VC_ERROR;
  643. }
  644. }
  645. if(m_format.SampleWidth == 0
  646. || m_format.SampleHeight == 0)
  647. {
  648. if(!Open(avctx, avctx->pix_fmt, m_shared))
  649. {
  650. CLog::Log(LOGERROR, "CDecoder::Check - decoder was not able to reset");
  651. Close();
  652. return VC_ERROR;
  653. }
  654. return VC_FLUSHED;
  655. }
  656. else
  657. {
  658. if(avctx->refs > m_refs)
  659. {
  660. CLog::Log(LOGWARNING, "CDecoder::Check - number of required reference frames increased, recreating decoder");
  661. #if ALLOW_ADDING_SURFACES
  662. if(!OpenDecoder())
  663. return VC_ERROR;
  664. #else
  665. Close();
  666. return VC_FLUSHED;
  667. #endif
  668. }
  669. }
  670. // Status reports are available only for the DXVA2_ModeH264 and DXVA2_ModeVC1 modes
  671. if(avctx->codec_id != AV_CODEC_ID_H264
  672. && avctx->codec_id != AV_CODEC_ID_VC1
  673. && avctx->codec_id != AV_CODEC_ID_WMV3)
  674. return 0;
  675. DXVA2_DecodeExecuteParams params = {};
  676. DXVA2_DecodeExtensionData data = {};
  677. union {
  678. DXVA_Status_H264 h264;
  679. DXVA_Status_VC1 vc1;
  680. } status = {};
  681. params.pExtensionData = &data;
  682. data.Function = DXVA_STATUS_REPORTING_FUNCTION;
  683. data.pPrivateOutputData = &status;
  684. data.PrivateOutputDataSize = avctx->codec_id == AV_CODEC_ID_H264 ? sizeof(DXVA_Status_H264) : sizeof(DXVA_Status_VC1);
  685. HRESULT hr;
  686. if(FAILED( hr = m_decoder->Execute(&params)))
  687. {
  688. CLog::Log(LOGWARNING, "DXVA - failed to get decoder status - 0x%08X", hr);
  689. return VC_ERROR;
  690. }
  691. if(avctx->codec_id == AV_CODEC_ID_H264)
  692. {
  693. if(status.h264.bStatus)
  694. CLog::Log(LOGWARNING, "DXVA - decoder problem of status %d with %d", status.h264.bStatus, status.h264.bBufType);
  695. }
  696. else
  697. {
  698. if(status.vc1.bStatus)
  699. CLog::Log(LOGWARNING, "DXVA - decoder problem of status %d with %d", status.vc1.bStatus, status.vc1.bBufType);
  700. }
  701. return 0;
  702. }
  703. bool CDecoder::OpenTarget(const GUID &guid)
  704. {
  705. UINT output_count = 0;
  706. D3DFORMAT *output_list = NULL;
  707. CHECK(m_service->GetDecoderRenderTargets(guid, &output_count, &output_list))
  708. SCOPE(D3DFORMAT, output_list);
  709. for (unsigned i = 0; render_targets[i] != D3DFMT_UNKNOWN; i++)
  710. for(unsigned k = 0; k < output_count; k++)
  711. if (output_list[k] == render_targets[i])
  712. {
  713. m_input = guid;
  714. m_format.Format = output_list[k];
  715. return true;
  716. }
  717. return false;
  718. }
  719. bool CDecoder::OpenDecoder()
  720. {
  721. SAFE_RELEASE(m_decoder);
  722. m_context->decoder = NULL;
  723. m_context->surface_count = m_refs + 1 + 1 + m_shared; // refs + 1 decode + 1 libavcodec safety + processor buffer
  724. if(m_context->surface_count > m_buffer_count)
  725. {
  726. CLog::Log(LOGDEBUG, "DXVA - allocating %d surfaces", m_context->surface_count - m_buffer_count);
  727. CHECK(m_service->CreateSurface( (m_format.SampleWidth + 15) & ~15
  728. , (m_format.SampleHeight + 15) & ~15
  729. , m_context->surface_count - 1 - m_buffer_count
  730. , m_format.Format
  731. , D3DPOOL_DEFAULT
  732. , 0
  733. , DXVA2_VideoDecoderRenderTarget
  734. , m_context->surface + m_buffer_count, NULL ));
  735. for(unsigned i = m_buffer_count; i < m_context->surface_count; i++)
  736. {
  737. m_buffer[i].surface = m_context->surface[i];
  738. m_surface_context->HoldSurface(m_context->surface[i]);
  739. }
  740. m_buffer_count = m_context->surface_count;
  741. }
  742. CHECK(m_service->CreateVideoDecoder(m_input, &m_format
  743. , m_context->cfg
  744. , m_context->surface
  745. , m_context->surface_count
  746. , &m_decoder))
  747. m_context->decoder = m_decoder;
  748. return true;
  749. }
  750. bool CDecoder::Supports(enum PixelFormat fmt)
  751. {
  752. if(fmt == PIX_FMT_DXVA2_VLD)
  753. return true;
  754. return false;
  755. }
  756. void CDecoder::RelBuffer(AVCodecContext *avctx, AVFrame *pic)
  757. {
  758. CSingleLock lock(m_section);
  759. IDirect3DSurface9* surface = (IDirect3DSurface9*)pic->data[3];
  760. for(unsigned i = 0; i < m_buffer_count; i++)
  761. {
  762. if(m_buffer[i].surface == surface)
  763. {
  764. m_buffer[i].used = false;
  765. m_buffer[i].age = ++m_buffer_age;
  766. break;
  767. }
  768. }
  769. for(unsigned i = 0; i < 4; i++)
  770. pic->data[i] = NULL;
  771. }
  772. int CDecoder::GetBuffer(AVCodecContext *avctx, AVFrame *pic)
  773. {
  774. CSingleLock lock(m_section);
  775. if(avctx->coded_width != m_format.SampleWidth
  776. || avctx->coded_height != m_format.SampleHeight)
  777. {
  778. Close();
  779. if(!Open(avctx, avctx->pix_fmt, m_shared))
  780. {
  781. Close();
  782. return -1;
  783. }
  784. }
  785. int count = 0;
  786. SVideoBuffer* buf = NULL;
  787. for(unsigned i = 0; i < m_buffer_count; i++)
  788. {
  789. if(m_buffer[i].used)
  790. count++;
  791. else
  792. {
  793. if(!buf || buf->age > m_buffer[i].age)
  794. buf = m_buffer+i;
  795. }
  796. }
  797. if(count >= m_refs+2)
  798. {
  799. m_refs++;
  800. #if ALLOW_ADDING_SURFACES
  801. if(!OpenDecoder())
  802. return -1;
  803. return GetBuffer(avctx, pic);
  804. #else
  805. Close();
  806. return -1;
  807. #endif
  808. }
  809. if(!buf)
  810. {
  811. CLog::Log(LOGERROR, "DXVA - unable to find new unused buffer");
  812. return -1;
  813. }
  814. pic->reordered_opaque = avctx->reordered_opaque;
  815. pic->type = FF_BUFFER_TYPE_USER;
  816. for(unsigned i = 0; i < 4; i++)
  817. {
  818. pic->data[i] = NULL;
  819. pic->linesize[i] = 0;
  820. }
  821. pic->data[0] = (uint8_t*)buf->surface;
  822. pic->data[3] = (uint8_t*)buf->surface;
  823. buf->used = true;
  824. return 0;
  825. }
  826. unsigned CDecoder::GetAllowedReferences()
  827. {
  828. return m_shared;
  829. }
  830. //---------------------------------------------------------------------------
  831. //---------------------------------------------------------------------------
  832. //------------------------ PROCESSING SERVICE -------------------------------
  833. //---------------------------------------------------------------------------
  834. //---------------------------------------------------------------------------
  835. CProcessor::CProcessor()
  836. {
  837. m_service = NULL;
  838. m_process = NULL;
  839. m_time = 0;
  840. g_Windowing.Register(this);
  841. m_surfaces = NULL;
  842. m_context = NULL;
  843. m_index = 0;
  844. m_progressive = true;
  845. }
  846. CProcessor::~CProcessor()
  847. {
  848. g_Windowing.Unregister(this);
  849. UnInit();
  850. }
  851. void CProcessor::UnInit()
  852. {
  853. CSingleLock lock(m_section);
  854. Close();
  855. SAFE_RELEASE(m_service);
  856. }
  857. void CProcessor::Close()
  858. {
  859. CSingleLock lock(m_section);
  860. SAFE_RELEASE(m_process);
  861. for(unsigned i = 0; i < m_sample.size(); i++)
  862. {
  863. SAFE_RELEASE(m_sample[i].context);
  864. SAFE_RELEASE(m_sample[i].sample.SrcSurface);
  865. }
  866. m_sample.clear();
  867. SAFE_RELEASE(m_context);
  868. if (m_surfaces)
  869. {
  870. for (unsigned i = 0; i < m_size; i++)
  871. SAFE_RELEASE(m_surfaces[i]);
  872. free(m_surfaces);
  873. m_surfaces = NULL;
  874. }
  875. }
  876. bool CProcessor::UpdateSize(const DXVA2_VideoDesc& dsc)
  877. {
  878. // TODO: print the D3FORMAT text version in log
  879. CLog::Log(LOGDEBUG, "DXVA - cheking samples array size using %d render target", dsc.Format);
  880. GUID* deint_guid_list = NULL;
  881. unsigned guid_count = 0;
  882. if (FAILED(m_service->GetVideoProcessorDeviceGuids(&dsc, &guid_count, &deint_guid_list)))
  883. return false;
  884. SCOPE(GUID, deint_guid_list);
  885. for (unsigned i = 0; i < guid_count; i++)
  886. {
  887. DXVA2_VideoProcessorCaps caps;
  888. CHECK(m_service->GetVideoProcessorCaps(deint_guid_list[i], &dsc, D3DFMT_X8R8G8B8, &caps));
  889. if (caps.NumBackwardRefSamples + caps.NumForwardRefSamples > m_size)
  890. {
  891. m_size = caps.NumBackwardRefSamples + caps.NumForwardRefSamples;
  892. CLog::Log(LOGDEBUG, "DXVA - updated maximum samples count to %d", m_size);
  893. }
  894. m_max_back_refs = std::max(caps.NumBackwardRefSamples, m_max_back_refs);
  895. m_max_fwd_refs = std::max(caps.NumForwardRefSamples, m_max_fwd_refs);
  896. }
  897. return true;
  898. }
  899. bool CProcessor::PreInit()
  900. {
  901. if (!LoadDXVA())
  902. return false;
  903. UnInit();
  904. CSingleLock lock(m_section);
  905. if (FAILED(g_DXVA2CreateVideoService(g_Windowing.Get3DDevice(), IID_IDirectXVideoProcessorService, (void**)&m_service)))
  906. return false;
  907. m_size = 0;
  908. // We try to find the maximum count of reference frames using a standard resolution and all known render target formats
  909. DXVA2_VideoDesc dsc = {};
  910. dsc.SampleWidth = 640;
  911. dsc.SampleHeight = 480;
  912. dsc.SampleFormat.SampleFormat = DXVA2_SampleFieldInterleavedOddFirst;
  913. m_max_back_refs = 0;
  914. m_max_fwd_refs = 0;
  915. for (unsigned i = 0; render_targets[i] != D3DFMT_UNKNOWN; i++)
  916. {
  917. dsc.Format = render_targets[i];
  918. if (!UpdateSize(dsc))
  919. CLog::Log(LOGDEBUG, "DXVA - render target not supported by processor");
  920. }
  921. m_size = m_max_back_refs + 1 + m_max_fwd_refs + 2; // refs + 1 display + 2 safety frames
  922. return true;
  923. }
  924. bool CProcessor::Open(UINT width, UINT height, unsigned int flags, unsigned int format, unsigned int extended_format)
  925. {
  926. Close();
  927. CSingleLock lock(m_section);
  928. if (!m_service)
  929. return false;
  930. DXVA2_VideoDesc dsc;
  931. memset(&dsc, 0, sizeof(DXVA2_VideoDesc));
  932. dsc.SampleWidth = width;
  933. dsc.SampleHeight = height;
  934. dsc.SampleFormat.VideoLighting = DXVA2_VideoLighting_dim;
  935. switch (CONF_FLAGS_CHROMA_MASK(flags))
  936. {
  937. case CONF_FLAGS_CHROMA_LEFT:
  938. dsc.SampleFormat.VideoChromaSubsampling = DXVA2_VideoChromaSubsampling_Horizontally_Cosited
  939. | DXVA2_VideoChromaSubsampling_Vertically_AlignedChromaPlanes;
  940. break;
  941. case CONF_FLAGS_CHROMA_CENTER:
  942. dsc.SampleFormat.VideoChromaSubsampling = DXVA2_VideoChromaSubsampling_Vertically_AlignedChromaPlanes;
  943. break;
  944. case CONF_FLAGS_CHROMA_TOPLEFT:
  945. dsc.SampleFormat.VideoChromaSubsampling = DXVA2_VideoChromaSubsampling_Horizontally_Cosited
  946. | DXVA2_VideoChromaSubsampling_Vertically_Cosited;
  947. break;
  948. default:
  949. dsc.SampleFormat.VideoChromaSubsampling = DXVA2_VideoChromaSubsampling_Unknown;
  950. }
  951. if (flags & CONF_FLAGS_YUV_FULLRANGE)
  952. dsc.SampleFormat.NominalRange = DXVA2_NominalRange_0_255;
  953. else
  954. dsc.SampleFormat.NominalRange = DXVA2_NominalRange_16_235;
  955. switch (CONF_FLAGS_YUVCOEF_MASK(flags))
  956. {
  957. case CONF_FLAGS_YUVCOEF_240M:
  958. dsc.SampleFormat.VideoTransferMatrix = DXVA2_VideoTransferMatrix_SMPTE240M;
  959. break;
  960. case CONF_FLAGS_YUVCOEF_BT601:
  961. dsc.SampleFormat.VideoTransferMatrix = DXVA2_VideoTransferMatrix_BT601;
  962. break;
  963. case CONF_FLAGS_YUVCOEF_BT709:
  964. dsc.SampleFormat.VideoTransferMatrix = DXVA2_VideoTransferMatrix_BT709;
  965. break;
  966. default:
  967. dsc.SampleFormat.VideoTransferMatrix = DXVA2_VideoTransferMatrix_Unknown;
  968. }
  969. switch (CONF_FLAGS_COLPRI_MASK(flags))
  970. {
  971. case CONF_FLAGS_COLPRI_BT709:
  972. dsc.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_BT709;
  973. break;
  974. case CONF_FLAGS_COLPRI_BT470M:
  975. dsc.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_BT470_2_SysM;
  976. break;
  977. case CONF_FLAGS_COLPRI_BT470BG:
  978. dsc.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_BT470_2_SysBG;
  979. break;
  980. case CONF_FLAGS_COLPRI_170M:
  981. dsc.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_SMPTE170M;
  982. break;
  983. case CONF_FLAGS_COLPRI_240M:
  984. dsc.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_SMPTE240M;
  985. break;
  986. default:
  987. dsc.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_Unknown;
  988. }
  989. switch (CONF_FLAGS_TRC_MASK(flags))
  990. {
  991. case CONF_FLAGS_TRC_BT709:
  992. dsc.SampleFormat.VideoTransferFunction = DXVA2_VideoTransFunc_709;
  993. break;
  994. case CONF_FLAGS_TRC_GAMMA22:
  995. dsc.SampleFormat.VideoTransferFunction = DXVA2_VideoTransFunc_22;
  996. break;
  997. case CONF_FLAGS_TRC_GAMMA28:
  998. dsc.SampleFormat.VideoTransferFunction = DXVA2_VideoTransFunc_28;
  999. break;
  1000. default:
  1001. dsc.SampleFormat.VideoTransferFunction = DXVA2_VideoTransFunc_Unknown;
  1002. }
  1003. m_desc = dsc;
  1004. if (format == RENDER_FMT_DXVA)
  1005. m_desc.Format = (D3DFORMAT)extended_format;
  1006. else
  1007. {
  1008. // Only NV12 software colorspace conversion is implemented for now
  1009. m_desc.Format = (D3DFORMAT)MAKEFOURCC('N','V','1','2');
  1010. if (!CreateSurfaces())
  1011. return false;
  1012. }
  1013. // frame flags are not available to do the complete calculation of the deinterlacing mode, as done in Render()
  1014. // It's OK, as it doesn't make any difference for all hardware except the few GPUs on the quirk list.
  1015. // And for those GPUs, the correct values will be calculated with the first Render() and the correct processor
  1016. // will replace the one allocated here, before the user sees anything.
  1017. // It's a bit inefficient, that's all.
  1018. m_deinterlace_mode = CMediaSettings::Get().GetCurrentVideoSettings().m_DeinterlaceMode;
  1019. m_interlace_method = g_renderManager.AutoInterlaceMethod(CMediaSettings::Get().GetCurrentVideoSettings().m_InterlaceMethod);;
  1020. EvaluateQuirkNoDeintProcForProg();
  1021. if (g_advancedSettings.m_DXVANoDeintProcForProgressive || m_quirk_nodeintprocforprog)
  1022. CLog::Log(LOGNOTICE, "DXVA: Auto deinterlacing mode workaround activated. Deinterlacing processor will be used only for interlaced frames.");
  1023. if (!OpenProcessor())
  1024. return false;
  1025. m_time = 0;
  1026. return true;
  1027. }
  1028. void CProcessor::EvaluateQuirkNoDeintProcForProg()
  1029. {
  1030. D3DADAPTER_IDENTIFIER9 AIdentifier = g_Windowing.GetAIdentifier();
  1031. for (unsigned idx = 0; NoDeintProcForProgDevices[idx].VendorID != 0; idx++)
  1032. {
  1033. if(NoDeintProcForProgDevices[idx].VendorID == AIdentifier.VendorId
  1034. && NoDeintProcForProgDevices[idx].DeviceID == AIdentifier.DeviceId)
  1035. {
  1036. m_quirk_nodeintprocforprog = true;
  1037. return;
  1038. }
  1039. }
  1040. m_quirk_nodeintprocforprog = false;
  1041. }
  1042. bool CProcessor::SelectProcessor()
  1043. {
  1044. // The CProcessor can be run after dxva or software decoding, possibly after software deinterlacing.
  1045. // Deinterlace mode off: force progressive
  1046. // Deinterlace mode auto or force, with a dxva deinterlacing method: create an deinterlacing capable processor. The frame flags will tell it to deinterlace or not.
  1047. m_progressive = m_deinterlace_mode == VS_DEINTERLACEMODE_OFF
  1048. || ( m_interlace_method != VS_INTERLACEMETHOD_DXVA_BOB
  1049. && m_interlace_method != VS_INTERLACEMETHOD_DXVA_BEST);
  1050. if (m_progressive)
  1051. m_desc.SampleFormat.SampleFormat = DXVA2_SampleProgressiveFrame;
  1052. else
  1053. m_desc.SampleFormat.SampleFormat = DXVA2_SampleFieldInterleavedEvenFirst;
  1054. GUID* guid_list;
  1055. unsigned guid_count;
  1056. CHECK(m_service->GetVideoProcessorDeviceGuids(&m_desc, &guid_count, &guid_list));
  1057. SCOPE(GUID, guid_list);
  1058. if(guid_count == 0)
  1059. {
  1060. CLog::Log(LOGDEBUG, "DXVA - unable to find any processors");
  1061. return false;
  1062. }
  1063. for(unsigned i = 0; i < guid_count; i++)
  1064. {
  1065. const GUID* g = &guid_list[i];
  1066. const dxva2_device_t* device = dxva2_find_device(g);
  1067. if (device)
  1068. {
  1069. CLog::Log(LOGDEBUG, "DXVA - processor found %s", device->name);
  1070. }
  1071. else
  1072. {
  1073. CHECK(m_service->GetVideoProcessorCaps(*g, &m_desc, D3DFMT_X8R8G8B8, &m_caps));
  1074. const dxva2_deinterlacetech_t* tech = dxva2_find_deinterlacetech(m_caps.DeinterlaceTechnology);
  1075. if (tech != NULL)
  1076. CLog::Log(LOGDEBUG, "DXVA - unknown processor %s found, deinterlace technology %s", GUIDToString(*g).c_str(), tech->name);
  1077. else
  1078. CLog::Log(LOGDEBUG, "DXVA - unknown processor %s found, unknown technology", GUIDToString(*g).c_str());
  1079. }
  1080. }
  1081. if (m_progressive)
  1082. m_device = DXVA2_VideoProcProgressiveDevice;
  1083. else if(m_interlace_method == VS_INTERLACEMETHOD_DXVA_BEST)
  1084. m_device = guid_list[0];
  1085. else
  1086. m_device = DXVA2_VideoProcBobDevice;
  1087. return true;
  1088. }
  1089. bool CProcessor::OpenProcessor()
  1090. {
  1091. if (!SelectProcessor())
  1092. return false;
  1093. SAFE_RELEASE(m_process);
  1094. const dxva2_device_t* device = dxva2_find_device(&m_device);
  1095. if (device)
  1096. CLog::Log(LOGDEBUG, "DXVA - processor selected %s", device->name);
  1097. else
  1098. CLog::Log(LOGDEBUG, "DXVA - processor selected %s", GUIDToString(m_device).c_str());
  1099. D3DFORMAT rtFormat = D3DFMT_X8R8G8B8;
  1100. CHECK(m_service->GetVideoProcessorCaps(m_device, &m_desc, rtFormat, &m_caps))
  1101. /* HACK for Intel Egde Device.
  1102. * won't work if backward refs is equals value from the capabilities *
  1103. * Possible reasons are: *
  1104. * 1) The device capabilities are incorrectly reported *
  1105. * 2) The device is broken */
  1106. if (IsEqualGUID(m_device, DXVA2_VideoProcIntelEdgeDevice))
  1107. m_caps.NumBackwardRefSamples = 0;
  1108. if (m_caps.DeviceCaps & DXVA2_VPDev_SoftwareDevice)
  1109. CLog::Log(LOGDEBUG, "DXVA - processor is software device");
  1110. if (m_caps.DeviceCaps & DXVA2_VPDev_EmulatedDXVA1)
  1111. CLog::Log(LOGDEBUG, "DXVA - processor is emulated dxva1");
  1112. CLog::Log(LOGDEBUG, "DXVA - processor requires %d past frames and %d future frames", m_caps.NumBackwardRefSamples, m_caps.NumForwardRefSamples);
  1113. if (m_caps.NumBackwardRefSamples + m_caps.NumForwardRefSamples + 3 > m_size)
  1114. {
  1115. CLog::Log(LOGERROR, "DXVA - used an incorrect number of reference frames creating processor");
  1116. return false;
  1117. }
  1118. CHECK(m_service->CreateVideoProcessor(m_device, &m_desc, rtFormat, 0, &m_process));
  1119. CHECK(m_service->GetProcAmpRange(m_device, &m_desc, rtFormat, DXVA2_ProcAmp_Brightness, &m_brightness));
  1120. CHECK(m_service->GetProcAmpRange(m_device, &m_desc, rtFormat, DXVA2_ProcAmp_Contrast , &m_contrast));
  1121. CHECK(m_service->GetProcAmpRange(m_device, &m_desc, rtFormat, DXVA2_ProcAmp_Hue , &m_hue));
  1122. CHECK(m_service->GetProcAmpRange(m_device, &m_desc, rtFormat, DXVA2_ProcAmp_Saturation, &m_saturation));
  1123. return true;
  1124. }
  1125. bool CProcessor::CreateSurfaces()
  1126. {
  1127. LPDIRECT3DDEVICE9 pD3DDevice = g_Windowing.Get3DDevice();
  1128. m_surfaces = (LPDIRECT3DSURFACE9*)calloc(m_size, sizeof(LPDIRECT3DSURFACE9));
  1129. for (unsigned idx = 0; idx < m_size; idx++)
  1130. CHECK(pD3DDevice->CreateOffscreenPlainSurface(
  1131. (m_desc.SampleWidth + 15) & ~15,
  1132. (m_desc.SampleHeight + 15) & ~15,
  1133. m_desc.Format,
  1134. D3DPOOL_DEFAULT,
  1135. &m_surfaces[idx],
  1136. NULL));
  1137. m_context = new CSurfaceContext();
  1138. return true;
  1139. }
  1140. REFERENCE_TIME CProcessor::Add(DVDVideoPicture* picture)
  1141. {
  1142. CSingleLock lock(m_section);
  1143. IDirect3DSurface9* surface = NULL;
  1144. CSurfaceContext* context = NULL;
  1145. if (picture->iFlags & DVP_FLAG_DROPPED)
  1146. return 0;
  1147. switch (picture->format)
  1148. {
  1149. case RENDER_FMT_DXVA:
  1150. {
  1151. surface = (IDirect3DSurface9*)picture->data[3];
  1152. context = picture->context;
  1153. break;
  1154. }
  1155. case RENDER_FMT_YUV420P:
  1156. {
  1157. surface = m_surfaces[m_index];
  1158. m_index = (m_index + 1) % m_size;
  1159. context = m_context;
  1160. D3DLOCKED_RECT rectangle;
  1161. if (FAILED(surface->LockRect(&rectangle, NULL, 0)))
  1162. return 0;
  1163. // Convert to NV12 - Luma
  1164. // TODO: Optimize this later using shaders/swscale/etc.
  1165. uint8_t *s = picture->data[0];
  1166. uint8_t* bits = (uint8_t*)(rectangle.pBits);
  1167. for (unsigned y = 0; y < picture->iHeight; y++)
  1168. {
  1169. memcpy(bits, s, picture->iWidth);
  1170. s += picture->iLineSize[0];
  1171. bits += rectangle.Pitch;
  1172. }
  1173. D3DSURFACE_DESC desc;
  1174. if (FAILED(surface->GetDesc(&desc)))
  1175. return 0;
  1176. // Convert to NV12 - Chroma
  1177. for (unsigned y = 0; y < picture->iHeight/2; y++)
  1178. {
  1179. uint8_t *s_u = picture->data[1] + (y * picture->iLineSize[1]);
  1180. uint8_t *s_v = picture->data[2] + (y * picture->iLineSize[2]);
  1181. uint8_t *d_uv = ((uint8_t*)(rectangle.pBits)) + (desc.Height + y) * rectangle.Pitch;
  1182. for (unsigned x = 0; x < picture->iWidth/2; x++)
  1183. {
  1184. *d_uv++ = *s_u++;
  1185. *d_uv++ = *s_v++;
  1186. }
  1187. }
  1188. if (FAILED(surface->UnlockRect()))
  1189. return 0;
  1190. break;
  1191. }
  1192. default:
  1193. {
  1194. CLog::Log(LOGWARNING, "DXVA - colorspace not supported by processor, skipping frame");
  1195. return 0;
  1196. }
  1197. }
  1198. if (!surface || !context)
  1199. return 0;
  1200. m_time += 2;
  1201. surface->AddRef();
  1202. context->Acquire();
  1203. SVideoSample vs = {};
  1204. vs.sample.Start = m_time;
  1205. vs.sample.End = 0;
  1206. vs.sample.SampleFormat = m_desc.SampleFormat;
  1207. if (picture->iFlags & DVP_FLAG_INTERLACED)
  1208. {
  1209. if (picture->iFlags & DVP_FLAG_TOP_FIELD_FIRST)
  1210. vs.sample.SampleFormat.SampleFormat = DXVA2_SampleFieldInterleavedEvenFirst;
  1211. else
  1212. vs.sample.SampleFormat.SampleFormat = DXVA2_SampleFieldInterleavedOddFirst;
  1213. }
  1214. else
  1215. {
  1216. vs.sample.SampleFormat.SampleFormat = DXVA2_SampleProgressiveFrame;
  1217. }
  1218. vs.sample.PlanarAlpha = DXVA2_Fixed32OpaqueAlpha();
  1219. vs.sample.SampleData = 0;
  1220. vs.sample.SrcSurface = surface;
  1221. vs.context = context;
  1222. if(!m_sample.empty())
  1223. m_sample.back().sample.End = vs.sample.Start;
  1224. m_sample.push_back(vs);
  1225. if (m_sample.size() > m_size)
  1226. {
  1227. SAFE_RELEASE(m_sample.front().context);
  1228. SAFE_RELEASE(m_sample.front().sample.SrcSurface);
  1229. m_sample.pop_front();
  1230. }
  1231. return m_time;
  1232. }
  1233. static DXVA2_Fixed32 ConvertRange(const DXVA2_ValueRange& range, int value, int min, int max, int def)
  1234. {
  1235. if(value > def)
  1236. return DXVA2FloatToFixed( DXVA2FixedToFloat(range.DefaultValue)
  1237. + (DXVA2FixedToFloat(range.MaxValue) - DXVA2FixedToFloat(range.DefaultValue))
  1238. * (value - def) / (max - def) );
  1239. else if(value < def)
  1240. return DXVA2FloatToFixed( DXVA2FixedToFloat(range.DefaultValue)
  1241. + (DXVA2FixedToFloat(range.MinValue) - DXVA2FixedToFloat(range.DefaultValue))
  1242. * (value - def) / (min - def) );
  1243. else
  1244. return range.DefaultValue;
  1245. }
  1246. bool CProcessor::Render(CRect src, CRect dst, IDirect3DSurface9* target, REFERENCE_TIME time, DWORD flags)
  1247. {
  1248. CSingleLock lock(m_section);
  1249. // With auto deinterlacing, the Ion Gen. 1 drops some frames with deinterlacing processor + progressive flags for progressive material.
  1250. // For that GPU (or when specified by an advanced setting), use the progressive processor.
  1251. // This is at the expense of the switch speed when video interlacing flags change and a deinterlacing processor is actually required.
  1252. EDEINTERLACEMODE mode = CMediaSettings::Get().GetCurrentVideoSettings().m_DeinterlaceMode;
  1253. if (g_advancedSettings.m_DXVANoDeintProcForProgressive || m_quirk_nodeintprocforprog)
  1254. mode = (flags & RENDER_FLAG_FIELD0 || flags & RENDER_FLAG_FIELD1) ? VS_DEINTERLACEMODE_FORCE : VS_DEINTERLACEMODE_OFF;
  1255. EINTERLACEMETHOD method = g_renderManager.AutoInterlaceMethod(CMediaSettings::Get().GetCurrentVideoSettings().m_InterlaceMethod);
  1256. if(m_interlace_method != method
  1257. || m_deinterlace_mode != mode
  1258. || !m_process)
  1259. {
  1260. m_deinterlace_mode = mode;
  1261. m_interlace_method = method;
  1262. if (!OpenProcessor())
  1263. return false;
  1264. }
  1265. // MinTime and MaxTime are the first and last samples to keep. Delete the rest.
  1266. REFERENCE_TIME MinTime = time - m_max_back_refs*2;
  1267. REFERENCE_TIME MaxTime = time + m_max_fwd_refs*2;
  1268. SSamples::iterator it = m_sample.begin();
  1269. while (it != m_sample.end())
  1270. {
  1271. if (it->sample.Start < MinTime)
  1272. {
  1273. SAFE_RELEASE(it->context);
  1274. SAFE_RELEASE(it->sample.SrcSurface);
  1275. it = m_sample.erase(it);
  1276. }
  1277. else
  1278. ++it;
  1279. }
  1280. if(m_sample.empty())
  1281. return false;
  1282. // MinTime and MaxTime are now the first and last samples to feed the processor.
  1283. MinTime = time - m_caps.NumBackwardRefSamples*2;
  1284. MaxTime = time + m_caps.NumForwardRefSamples*2;
  1285. D3DSURFACE_DESC desc;
  1286. CHECK(target->GetDesc(&desc));
  1287. CRect rectTarget(0, 0, desc.Width, desc.Height);
  1288. CWIN32Util::CropSource(src, dst, rectTarget);
  1289. RECT sourceRECT = { src.x1, src.y1, src.x2, src.y2 };
  1290. RECT dstRECT = { dst.x1, dst.y1, dst.x2, dst.y2 };
  1291. // How to prepare the samples array for VideoProcessBlt
  1292. // - always provide current picture + the number of forward and backward references required by the current processor.
  1293. // - provide the surfaces in the array in increasing temporal order
  1294. // - at the start of playback, there may not be enough samples available. Use SampleFormat.SampleFormat = DXVA2_SampleUnknown for the missing samples.
  1295. int count = 1 + m_caps.NumBackwardRefSamples + m_caps.NumForwardRefSamples;
  1296. int valid = 0;
  1297. auto_aptr<DXVA2_VideoSample> samp(new DXVA2_VideoSample[count]);
  1298. for (int i = 0; i < count; i++)
  1299. samp[i].SampleFormat.SampleFormat = DXVA2_SampleUnknown;
  1300. for(it = m_sample.begin(); it != m_sample.end() && valid < count; ++it)
  1301. {
  1302. if (it->sample.Start >= MinTime && it->sample.Start <= MaxTime)
  1303. {
  1304. DXVA2_VideoSample& vs = samp[(it->sample.Start - MinTime) / 2];
  1305. vs = it->sample;
  1306. vs.SrcRect = sourceRECT;
  1307. vs.DstRect = dstRECT;
  1308. if(vs.End == 0)
  1309. vs.End = vs.Start + 2;
  1310. // Override the sample format when the processor doesn't need to deinterlace or when deinterlacing is forced and flags are missing.
  1311. if (m_progressive)
  1312. vs.SampleFormat.SampleFormat = DXVA2_SampleProgressiveFrame;
  1313. else if (m_deinterlace_mode == VS_DEINTERLACEMODE_FORCE && vs.SampleFormat.SampleFormat == DXVA2_SampleProgressiveFrame)
  1314. vs.SampleFormat.SampleFormat = DXVA2_SampleFieldInterleavedEvenFirst;
  1315. valid++;
  1316. }
  1317. }
  1318. // MS' guidelines above don't work. The blit fails when the processor is given DXVA2_SampleUnknown samples (with ATI at least).
  1319. // The ATI driver works with a reduced number of samples though, support that for now.
  1320. // Problem is an ambiguity if there are future refs requested by the processor. There are no such implementations at the moment.
  1321. int offset = 0;
  1322. if(valid < count)
  1323. {
  1324. CLog::Log(LOGWARNING, __FUNCTION__" - did not find all required samples, adjusting the sample array.");
  1325. for (int i = 0; i < count; i++)
  1326. {
  1327. if (samp[i].SampleFormat.SampleFormat == DXVA2_SampleUnknown)
  1328. offset = i+1;
  1329. }
  1330. count -= offset;
  1331. if (count == 0)
  1332. {
  1333. CLog::Log(LOGWARNING, __FUNCTION__" - no usable samples.");
  1334. return false;
  1335. }
  1336. }
  1337. DXVA2_VideoProcessBltParams blt = {};
  1338. blt.TargetFrame = time;
  1339. if (flags & RENDER_FLAG_FIELD1)
  1340. blt.TargetFrame += 1;
  1341. blt.TargetRect = dstRECT;
  1342. blt.ConstrictionSize.cx = 0;
  1343. blt.ConstrictionSize.cy = 0;
  1344. blt.DestFormat.VideoTransferFunction = DXVA2_VideoTransFunc_sRGB;
  1345. blt.DestFormat.SampleFormat = DXVA2_SampleProgressiveFrame;
  1346. if(g_Windowing.UseLimitedColor())
  1347. blt.DestFormat.NominalRange = DXVA2_NominalRange_16_235;
  1348. else
  1349. blt.DestFormat.NominalRange = DXVA2_NominalRange_0_255;
  1350. blt.Alpha = DXVA2_Fixed32OpaqueAlpha();
  1351. blt.ProcAmpValues.Brightness = ConvertRange( m_brightness, CMediaSettings::Get().GetCurrentVideoSettings().m_Brightness
  1352. , 0, 100, 50);
  1353. blt.ProcAmpValues.Contrast = ConvertRange( m_contrast, CMediaSettings::Get().GetCurrentVideoSettings().m_Contrast
  1354. , 0, 100, 50);
  1355. blt.ProcAmpValues.Hue = m_hue.DefaultValue;
  1356. blt.ProcAmpValues.Saturation = m_saturation.DefaultValue;
  1357. blt.BackgroundColor.Y = 0x1000;
  1358. blt.BackgroundColor.Cb = 0x8000;
  1359. blt.BackgroundColor.Cr = 0x8000;
  1360. blt.BackgroundColor.Alpha = 0xffff;
  1361. /* HACK to kickstart certain DXVA drivers (poulsbo) which oddly *
  1362. * won't render anything until someting else have been rendered. */
  1363. g_Windowing.Get3DDevice()->SetFVF( D3DFVF_XYZ );
  1364. float verts[2][3]= {};
  1365. g_Windowing.Get3DDevice()->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 1, verts, 3*sizeof(float));
  1366. CHECK(m_process->VideoProcessBlt(target, &blt, &samp[offset], count, NULL));
  1367. return true;
  1368. }
  1369. #endif