PageRenderTime 176ms CodeModel.GetById 44ms app.highlight 121ms RepoModel.GetById 1ms app.codeStats 0ms

/xbmc/cores/dvdplayer/DVDCodecs/Video/DXVA.cpp

http://github.com/xbmc/xbmc
C++ | 1618 lines | 1290 code | 249 blank | 79 comment | 237 complexity | 3103d0a459322864d14551b3fc768147 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/*
   2 *      Copyright (C) 2005-2013 Team XBMC
   3 *      http://xbmc.org
   4 *
   5 *  This Program is free software; you can redistribute it and/or modify
   6 *  it under the terms of the GNU General Public License as published by
   7 *  the Free Software Foundation; either version 2, or (at your option)
   8 *  any later version.
   9 *
  10 *  This Program is distributed in the hope that it will be useful,
  11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13 *  GNU General Public License for more details.
  14 *
  15 *  You should have received a copy of the GNU General Public License
  16 *  along with XBMC; see the file COPYING.  If not, see
  17 *  <http://www.gnu.org/licenses/>.
  18 *
  19 */
  20
  21#ifdef HAS_DX
  22
  23// setting that here because otherwise SampleFormat is defined to AVSampleFormat
  24// which we don't use here
  25#define FF_API_OLD_SAMPLE_FMT 0
  26
  27#include <windows.h>
  28#include <d3d9.h>
  29#include <Initguid.h>
  30#include <dxva.h>
  31#include <dxva2api.h>
  32#include "libavcodec/dxva2.h"
  33#include "../DVDCodecUtils.h"
  34
  35#include "DXVA.h"
  36#include "windowing/WindowingFactory.h"
  37#include "../../../VideoRenderers/WinRenderer.h"
  38#include "settings/Settings.h"
  39#include "settings/MediaSettings.h"
  40#include "boost/shared_ptr.hpp"
  41#include "utils/AutoPtrHandle.h"
  42#include "settings/AdvancedSettings.h"
  43#include "settings/MediaSettings.h"
  44#include "cores/VideoRenderers/RenderManager.h"
  45#include "win32/WIN32Util.h"
  46
  47#define ALLOW_ADDING_SURFACES 0
  48
  49using namespace DXVA;
  50using namespace AUTOPTR;
  51using namespace std;
  52
  53typedef HRESULT (__stdcall *DXVA2CreateVideoServicePtr)(IDirect3DDevice9* pDD, REFIID riid, void** ppService);
  54static DXVA2CreateVideoServicePtr g_DXVA2CreateVideoService;
  55
  56static bool LoadDXVA()
  57{
  58  static CCriticalSection g_section;
  59  static HMODULE          g_handle;
  60
  61  CSingleLock lock(g_section);
  62  if(g_handle == NULL)
  63    g_handle = LoadLibraryEx("dxva2.dll", NULL, 0);
  64  if(g_handle == NULL)
  65    return false;
  66  g_DXVA2CreateVideoService = (DXVA2CreateVideoServicePtr)GetProcAddress(g_handle, "DXVA2CreateVideoService");
  67  if(g_DXVA2CreateVideoService == NULL)
  68    return false;
  69  return true;
  70}
  71
  72
  73
  74static void RelBufferS(AVCodecContext *avctx, AVFrame *pic)
  75{ ((CDecoder*)((CDVDVideoCodecFFmpeg*)avctx->opaque)->GetHardware())->RelBuffer(avctx, pic); }
  76
  77static int GetBufferS(AVCodecContext *avctx, AVFrame *pic) 
  78{  return ((CDecoder*)((CDVDVideoCodecFFmpeg*)avctx->opaque)->GetHardware())->GetBuffer(avctx, pic); }
  79
  80
  81DEFINE_GUID(DXVADDI_Intel_ModeH264_A, 0x604F8E64,0x4951,0x4c54,0x88,0xFE,0xAB,0xD2,0x5C,0x15,0xB3,0xD6);
  82DEFINE_GUID(DXVADDI_Intel_ModeH264_C, 0x604F8E66,0x4951,0x4c54,0x88,0xFE,0xAB,0xD2,0x5C,0x15,0xB3,0xD6);
  83DEFINE_GUID(DXVADDI_Intel_ModeH264_E, 0x604F8E68,0x4951,0x4c54,0x88,0xFE,0xAB,0xD2,0x5C,0x15,0xB3,0xD6);
  84DEFINE_GUID(DXVADDI_Intel_ModeVC1_E , 0xBCC5DB6D,0xA2B6,0x4AF0,0xAC,0xE4,0xAD,0xB1,0xF7,0x87,0xBC,0x89);
  85
  86#if _MSC_VER < 1700
  87DEFINE_GUID(DXVA_ModeMPEG2and1_VLD,   0x86695f12,0x340e,0x4f04,0x9f,0xd3,0x92,0x53,0xdd,0x32,0x74,0x60);
  88// When exposed by an accelerator, indicates compliance with the August 2010 spec update
  89DEFINE_GUID(DXVA_ModeVC1_D2010,       0x1b81beA4,0xa0c7,0x11d3,0xb9,0x84,0x00,0xc0,0x4f,0x2e,0x73,0xc5);
  90#endif
  91
  92typedef struct {
  93    const char   *name;
  94    const GUID   *guid;
  95    int          codec;
  96} dxva2_mode_t;
  97
  98/* XXX Prefered modes must come first */
  99static const dxva2_mode_t dxva2_modes[] = {
 100    { "MPEG2 VLD",    &DXVA2_ModeMPEG2_VLD,     AV_CODEC_ID_MPEG2VIDEO },
 101    { "MPEG1/2 VLD",  &DXVA_ModeMPEG2and1_VLD,  AV_CODEC_ID_MPEG2VIDEO },
 102    { "MPEG2 MoComp", &DXVA2_ModeMPEG2_MoComp,  0 },
 103    { "MPEG2 IDCT",   &DXVA2_ModeMPEG2_IDCT,    0 },
 104
 105    // Intel drivers return standard modes in addition to the Intel specific ones. Try the Intel specific first, they work better for Sandy Bridges.
 106    { "Intel H.264 VLD, no FGT",                                      &DXVADDI_Intel_ModeH264_E, AV_CODEC_ID_H264 },
 107    { "Intel H.264 inverse discrete cosine transform (IDCT), no FGT", &DXVADDI_Intel_ModeH264_C, 0 },
 108    { "Intel H.264 motion compensation (MoComp), no FGT",             &DXVADDI_Intel_ModeH264_A, 0 },
 109    { "Intel VC-1 VLD",                                               &DXVADDI_Intel_ModeVC1_E,  0 },
 110
 111    { "H.264 variable-length decoder (VLD), FGT",               &DXVA2_ModeH264_F, AV_CODEC_ID_H264 },
 112    { "H.264 VLD, no FGT",                                      &DXVA2_ModeH264_E, AV_CODEC_ID_H264 },
 113    { "H.264 IDCT, FGT",                                        &DXVA2_ModeH264_D, 0,            },
 114    { "H.264 inverse discrete cosine transform (IDCT), no FGT", &DXVA2_ModeH264_C, 0,            },
 115    { "H.264 MoComp, FGT",                                      &DXVA2_ModeH264_B, 0,            },
 116    { "H.264 motion compensation (MoComp), no FGT",             &DXVA2_ModeH264_A, 0,            },
 117
 118    { "Windows Media Video 8 MoComp",           &DXVA2_ModeWMV8_B, 0 },
 119    { "Windows Media Video 8 post processing",  &DXVA2_ModeWMV8_A, 0 },
 120
 121    { "Windows Media Video 9 IDCT",             &DXVA2_ModeWMV9_C, 0 },
 122    { "Windows Media Video 9 MoComp",           &DXVA2_ModeWMV9_B, 0 },
 123    { "Windows Media Video 9 post processing",  &DXVA2_ModeWMV9_A, 0 },
 124
 125    { "VC-1 VLD",             &DXVA2_ModeVC1_D,    AV_CODEC_ID_VC1 },
 126    { "VC-1 VLD",             &DXVA2_ModeVC1_D,    AV_CODEC_ID_WMV3 },
 127    { "VC-1 VLD 2010",        &DXVA_ModeVC1_D2010, AV_CODEC_ID_VC1 },
 128    { "VC-1 VLD 2010",        &DXVA_ModeVC1_D2010, AV_CODEC_ID_WMV3 },
 129    { "VC-1 IDCT",            &DXVA2_ModeVC1_C,    0 },
 130    { "VC-1 MoComp",          &DXVA2_ModeVC1_B,    0 },
 131    { "VC-1 post processing", &DXVA2_ModeVC1_A,    0 },
 132
 133    { NULL, NULL, 0 }
 134};
 135
 136DEFINE_GUID(DXVA2_VideoProcATIVectorAdaptiveDevice,   0x3C5323C1,0x6fb7,0x44f5,0x90,0x81,0x05,0x6b,0xf2,0xee,0x44,0x9d);
 137DEFINE_GUID(DXVA2_VideoProcATIMotionAdaptiveDevice,   0x552C0DAD,0xccbc,0x420b,0x83,0xc8,0x74,0x94,0x3c,0xf9,0xf1,0xa6);
 138DEFINE_GUID(DXVA2_VideoProcATIAdaptiveDevice,         0x6E8329FF,0xb642,0x418b,0xbc,0xf0,0xbc,0xb6,0x59,0x1e,0x25,0x5f);
 139DEFINE_GUID(DXVA2_VideoProcNVidiaAdaptiveDevice,      0x6CB69578,0x7617,0x4637,0x91,0xE5,0x1C,0x02,0xDB,0x81,0x02,0x85);
 140DEFINE_GUID(DXVA2_VideoProcIntelEdgeDevice,           0xBF752EF6,0x8CC4,0x457A,0xBE,0x1B,0x08,0xBD,0x1C,0xAE,0xEE,0x9F);
 141DEFINE_GUID(DXVA2_VideoProcNVidiaUnknownDevice,       0xF9F19DA5,0x3B09,0x4B2F,0x9D,0x89,0xC6,0x47,0x53,0xE3,0xEA,0xAB);
 142
 143typedef struct {
 144    const char   *name;
 145    const GUID   *guid;
 146} dxva2_device_t;
 147
 148static const dxva2_device_t dxva2_devices[] = {
 149  { "Progressive Device",           &DXVA2_VideoProcProgressiveDevice         },
 150  { "Bob Device",                   &DXVA2_VideoProcBobDevice                 },
 151  { "Vector Adaptative Device",     &DXVA2_VideoProcATIVectorAdaptiveDevice   },
 152  { "Motion Adaptative Device",     &DXVA2_VideoProcATIMotionAdaptiveDevice   },
 153  { "Adaptative Device",            &DXVA2_VideoProcATIAdaptiveDevice         },
 154  { "Spatial-temporal device",      &DXVA2_VideoProcNVidiaAdaptiveDevice      },
 155  { "Edge directed device",         &DXVA2_VideoProcIntelEdgeDevice           },
 156  { "Unknown device (nVidia)",      &DXVA2_VideoProcNVidiaUnknownDevice       },
 157  { NULL, NULL }
 158};
 159
 160typedef struct {
 161    const char   *name;
 162    unsigned      flags;
 163} dxva2_deinterlacetech_t;
 164
 165static const dxva2_deinterlacetech_t dxva2_deinterlacetechs[] = {
 166  { "Inverse Telecine",                   DXVA2_DeinterlaceTech_InverseTelecine        },
 167  { "Motion vector steered",              DXVA2_DeinterlaceTech_MotionVectorSteered    },
 168  { "Pixel adaptive",                     DXVA2_DeinterlaceTech_PixelAdaptive          },
 169  { "Field adaptive",                     DXVA2_DeinterlaceTech_FieldAdaptive          },
 170  { "Edge filtering",                     DXVA2_DeinterlaceTech_EdgeFiltering          },
 171  { "Median filtering",                   DXVA2_DeinterlaceTech_MedianFiltering        },
 172  { "Bob vertical stretch 4-tap",         DXVA2_DeinterlaceTech_BOBVerticalStretch4Tap },
 173  { "Bob vertical stretch",               DXVA2_DeinterlaceTech_BOBVerticalStretch     },
 174  { "Bob line replicate",                 DXVA2_DeinterlaceTech_BOBLineReplicate       },
 175  { "Unknown",                            DXVA2_DeinterlaceTech_Unknown                },
 176  { NULL, 0 }
 177};
 178
 179
 180// Prefered targets must be first
 181static const D3DFORMAT render_targets[] = {
 182    (D3DFORMAT)MAKEFOURCC('N','V','1','2'),
 183    (D3DFORMAT)MAKEFOURCC('Y','V','1','2'),
 184    D3DFMT_UNKNOWN
 185};
 186
 187// List of PCI Device ID of ATI cards with UVD or UVD+ decoding block.
 188static DWORD UVDDeviceID [] = {
 189  0x95C0, // ATI Radeon HD 3400 Series (and others)
 190  0x95C5, // ATI Radeon HD 3400 Series (and others)
 191  0x95C4, // ATI Radeon HD 3400 Series (and others)
 192  0x94C3, // ATI Radeon HD 3410
 193  0x9589, // ATI Radeon HD 3600 Series (and others)
 194  0x9598, // ATI Radeon HD 3600 Series (and others)
 195  0x9591, // ATI Radeon HD 3600 Series (and others)
 196  0x9501, // ATI Radeon HD 3800 Series (and others)
 197  0x9505, // ATI Radeon HD 3800 Series (and others)
 198  0x9507, // ATI Radeon HD 3830
 199  0x9513, // ATI Radeon HD 3850 X2
 200  0x950F, // ATI Radeon HD 3850 X2
 201  0x0000
 202};
 203
 204// List of PCI Device ID of nVidia cards with the macroblock width issue. More or less the VP3 block.
 205// Per NVIDIA Accelerated Linux Graphics Driver, Appendix A Supported NVIDIA GPU Products, cards with note 1.
 206static DWORD VP3DeviceID [] = {
 207  0x06E0, // GeForce 9300 GE
 208  0x06E1, // GeForce 9300 GS
 209  0x06E2, // GeForce 8400
 210  0x06E4, // GeForce 8400 GS
 211  0x06E5, // GeForce 9300M GS
 212  0x06E6, // GeForce G100
 213  0x06E8, // GeForce 9200M GS
 214  0x06E9, // GeForce 9300M GS
 215  0x06EC, // GeForce G 105M
 216  0x06EF, // GeForce G 103M
 217  0x06F1, // GeForce G105M
 218  0x0844, // GeForce 9100M G
 219  0x0845, // GeForce 8200M G
 220  0x0846, // GeForce 9200
 221  0x0847, // GeForce 9100
 222  0x0848, // GeForce 8300
 223  0x0849, // GeForce 8200
 224  0x084A, // nForce 730a
 225  0x084B, // GeForce 9200
 226  0x084C, // nForce 980a/780a SLI
 227  0x084D, // nForce 750a SLI
 228  0x0860, // GeForce 9400
 229  0x0861, // GeForce 9400
 230  0x0862, // GeForce 9400M G
 231  0x0863, // GeForce 9400M
 232  0x0864, // GeForce 9300
 233  0x0865, // ION
 234  0x0866, // GeForce 9400M G
 235  0x0867, // GeForce 9400
 236  0x0868, // nForce 760i SLI
 237  0x086A, // GeForce 9400
 238  0x086C, // GeForce 9300 / nForce 730i
 239  0x086D, // GeForce 9200
 240  0x086E, // GeForce 9100M G
 241  0x086F, // GeForce 8200M G
 242  0x0870, // GeForce 9400M
 243  0x0871, // GeForce 9200
 244  0x0872, // GeForce G102M
 245  0x0873, // GeForce G102M
 246  0x0874, // ION
 247  0x0876, // ION
 248  0x087A, // GeForce 9400
 249  0x087D, // ION
 250  0x087E, // ION LE
 251  0x087F, // ION LE
 252  0x0000
 253};
 254
 255typedef struct {
 256    DWORD VendorID;
 257    DWORD DeviceID;
 258} pci_device;
 259
 260// List of devices that drop frames with a deinterlacing processor for progressive material.
 261static const pci_device NoDeintProcForProgDevices[] = {
 262  { PCIV_nVidia, 0x0865 }, // ION
 263  { PCIV_nVidia, 0x0874 }, // ION
 264  { PCIV_nVidia, 0x0876 }, // ION
 265  { PCIV_nVidia, 0x087D }, // ION
 266  { PCIV_nVidia, 0x087E }, // ION LE
 267  { PCIV_nVidia, 0x087F }, // ION LE
 268  { 0          , 0x0000 }
 269};
 270
 271static CStdString GUIDToString(const GUID& guid)
 272{
 273  CStdString buffer;
 274  buffer.Format("%08X-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x"
 275              , guid.Data1, guid.Data2, guid.Data3
 276              , guid.Data4[0], guid.Data4[1]
 277              , guid.Data4[2], guid.Data4[3], guid.Data4[4]
 278              , guid.Data4[5], guid.Data4[6], guid.Data4[7]);
 279  return buffer;
 280}
 281
 282static const dxva2_mode_t *dxva2_find_mode(const GUID *guid)
 283{
 284    for (unsigned i = 0; dxva2_modes[i].name; i++) {
 285        if (IsEqualGUID(*dxva2_modes[i].guid, *guid))
 286            return &dxva2_modes[i];
 287    }
 288    return NULL;
 289}
 290
 291static const dxva2_device_t *dxva2_find_device(const GUID *guid)
 292{
 293    for (unsigned i = 0; dxva2_devices[i].name; i++) {
 294        if (IsEqualGUID(*dxva2_devices[i].guid, *guid))
 295            return &dxva2_devices[i];
 296    }
 297    return NULL;
 298}
 299
 300static const dxva2_deinterlacetech_t *dxva2_find_deinterlacetech(unsigned flags)
 301{
 302    for (unsigned i = 0; dxva2_deinterlacetechs[i].name; i++) {
 303        if (dxva2_deinterlacetechs[i].flags == flags)
 304            return &dxva2_deinterlacetechs[i];
 305    }
 306    return NULL;
 307}
 308
 309#define SCOPE(type, var) boost::shared_ptr<type> var##_holder(var, CoTaskMemFree);
 310
 311CSurfaceContext::CSurfaceContext()
 312{
 313}
 314
 315CSurfaceContext::~CSurfaceContext()
 316{
 317  for (vector<IDirect3DSurface9*>::iterator it = m_heldsurfaces.begin(); it != m_heldsurfaces.end(); ++it)
 318    SAFE_RELEASE(*it);
 319}
 320
 321void CSurfaceContext::HoldSurface(IDirect3DSurface9* surface)
 322{
 323  surface->AddRef();
 324  m_heldsurfaces.push_back(surface);
 325}
 326
 327CDecoder::SVideoBuffer::SVideoBuffer()
 328{
 329  surface = NULL;
 330  Clear();
 331}
 332
 333CDecoder::SVideoBuffer::~SVideoBuffer()
 334{
 335  Clear();
 336}
 337
 338void CDecoder::SVideoBuffer::Clear()
 339{
 340  SAFE_RELEASE(surface);
 341  age     = 0;
 342  used    = 0;
 343}
 344
 345CDecoder::CDecoder()
 346 : m_event(true)
 347{
 348  m_event.Set();
 349  m_state     = DXVA_OPEN;
 350  m_service   = NULL;
 351  m_device    = NULL;
 352  m_decoder   = NULL;
 353  m_buffer_count = 0;
 354  m_buffer_age   = 0;
 355  m_refs         = 0;
 356  m_shared       = 0;
 357  m_surface_context = NULL;
 358  memset(&m_format, 0, sizeof(m_format));
 359  m_context          = (dxva_context*)calloc(1, sizeof(dxva_context));
 360  m_context->cfg     = (DXVA2_ConfigPictureDecode*)calloc(1, sizeof(DXVA2_ConfigPictureDecode));
 361  m_context->surface = (IDirect3DSurface9**)calloc(m_buffer_max, sizeof(IDirect3DSurface9*));
 362  g_Windowing.Register(this);
 363}
 364
 365CDecoder::~CDecoder()
 366{
 367  g_Windowing.Unregister(this);
 368  Close();
 369  free(m_context->surface);
 370  free(const_cast<DXVA2_ConfigPictureDecode*>(m_context->cfg)); // yes this is foobar
 371  free(m_context);
 372}
 373
 374void CDecoder::Close()
 375{
 376  CSingleLock lock(m_section);
 377  SAFE_RELEASE(m_decoder);
 378  SAFE_RELEASE(m_service);
 379  SAFE_RELEASE(m_surface_context);
 380  for(unsigned i = 0; i < m_buffer_count; i++)
 381    m_buffer[i].Clear();
 382  m_buffer_count = 0;
 383  memset(&m_format, 0, sizeof(m_format));
 384}
 385
 386#define CHECK(a) \
 387do { \
 388  HRESULT res = a; \
 389  if(FAILED(res)) \
 390  { \
 391    CLog::Log(LOGERROR, "DXVA - failed executing "#a" at line %d with error %x", __LINE__, res); \
 392    return false; \
 393  } \
 394} while(0);
 395
 396static bool CheckH264L41(AVCodecContext *avctx)
 397{
 398    unsigned widthmbs  = (avctx->coded_width + 15) / 16;  // width in macroblocks
 399    unsigned heightmbs = (avctx->coded_height + 15) / 16; // height in macroblocks
 400    unsigned maxdpbmbs = 32768;                     // Decoded Picture Buffer (DPB) capacity in macroblocks for L4.1
 401
 402    return (avctx->refs * widthmbs * heightmbs <= maxdpbmbs);
 403}
 404
 405static bool IsL41LimitedATI()
 406{
 407  D3DADAPTER_IDENTIFIER9 AIdentifier = g_Windowing.GetAIdentifier();
 408
 409  if(AIdentifier.VendorId == PCIV_ATI)
 410  {
 411    for (unsigned idx = 0; UVDDeviceID[idx] != 0; idx++)
 412    {
 413      if (UVDDeviceID[idx] == AIdentifier.DeviceId)
 414        return true;
 415    }
 416  }
 417  return false;
 418}
 419
 420static bool HasVP3WidthBug(AVCodecContext *avctx)
 421{
 422  // Some nVidia VP3 hardware cannot do certain macroblock widths
 423
 424  D3DADAPTER_IDENTIFIER9 AIdentifier = g_Windowing.GetAIdentifier();
 425
 426  if(AIdentifier.VendorId == PCIV_nVidia
 427  && !CDVDCodecUtils::IsVP3CompatibleWidth(avctx->coded_width))
 428  {
 429    // Find the card in a known list of problematic VP3 hardware
 430    for (unsigned idx = 0; VP3DeviceID[idx] != 0; idx++)
 431      if (VP3DeviceID[idx] == AIdentifier.DeviceId)
 432        return true;
 433  }
 434  return false;
 435}
 436
 437static bool CheckCompatibility(AVCodecContext *avctx)
 438{
 439  // The incompatibilities are all for H264
 440  if(avctx->codec_id != AV_CODEC_ID_H264)
 441    return true;
 442
 443  // Macroblock width incompatibility
 444  if (HasVP3WidthBug(avctx))
 445  {
 446    CLog::Log(LOGWARNING,"DXVA - width %i is not supported with nVidia VP3 hardware. DXVA will not be used", avctx->coded_width);
 447    return false;
 448  }
 449
 450
 451  // Check for hardware limited to H264 L4.1 (ie Bluray).
 452
 453  // No advanced settings: autodetect.
 454  // The advanced setting lets the user override the autodetection (in case of false positive or negative)
 455
 456  bool checkcompat;
 457  if (!g_advancedSettings.m_DXVACheckCompatibilityPresent)
 458    checkcompat = IsL41LimitedATI();  // ATI UVD and UVD+ cards can only do L4.1 - corresponds roughly to series 3xxx
 459  else
 460    checkcompat = g_advancedSettings.m_DXVACheckCompatibility;
 461
 462  if (checkcompat && !CheckH264L41(avctx))
 463  {
 464      CLog::Log(LOGWARNING, "DXVA - compatibility check: video exceeds L4.1. DXVA will not be used.");
 465      return false;
 466  }
 467
 468  return true;
 469}
 470
 471bool CDecoder::Open(AVCodecContext *avctx, enum PixelFormat fmt, unsigned int surfaces)
 472{
 473  if (!CheckCompatibility(avctx))
 474    return false;
 475
 476  if(!LoadDXVA())
 477    return false;
 478
 479  CSingleLock lock(m_section);
 480  Close();
 481
 482  if(m_state == DXVA_LOST)
 483  {
 484    CLog::Log(LOGDEBUG, "DXVA - device is in lost state, we can't start");
 485    return false;
 486  }
 487
 488  CHECK(g_DXVA2CreateVideoService(g_Windowing.Get3DDevice(), IID_IDirectXVideoDecoderService, (void**)&m_service))
 489
 490  UINT  input_count;
 491  GUID *input_list;
 492
 493  CHECK(m_service->GetDecoderDeviceGuids(&input_count, &input_list))
 494  SCOPE(GUID, input_list);
 495
 496  for(unsigned i = 0; i < input_count; i++)
 497  {
 498    const GUID *g            = &input_list[i];
 499    const dxva2_mode_t *mode = dxva2_find_mode(g);
 500    if(mode)
 501      CLog::Log(LOGDEBUG, "DXVA - supports '%s'", mode->name);
 502    else
 503      CLog::Log(LOGDEBUG, "DXVA - supports %s", GUIDToString(*g).c_str());
 504  }
 505
 506  m_format.Format = D3DFMT_UNKNOWN;
 507  for(const dxva2_mode_t* mode = dxva2_modes; mode->name && m_format.Format == D3DFMT_UNKNOWN; mode++)
 508  {
 509    if(mode->codec != avctx->codec_id)
 510      continue;
 511
 512    for(unsigned j = 0; j < input_count; j++)
 513    {
 514      if(!IsEqualGUID(input_list[j], *mode->guid))
 515        continue;
 516
 517      CLog::Log(LOGDEBUG, "DXVA - trying '%s'", mode->name);
 518      if(OpenTarget(input_list[j]))
 519        break;
 520    }
 521  }
 522
 523  if(m_format.Format == D3DFMT_UNKNOWN)
 524  {
 525    CLog::Log(LOGDEBUG, "DXVA - unable to find an input/output format combination");
 526    return false;
 527  }
 528
 529  m_format.SampleWidth  = avctx->coded_width;
 530  m_format.SampleHeight = avctx->coded_height;
 531  m_format.SampleFormat.SampleFormat           = DXVA2_SampleProgressiveFrame;
 532  m_format.SampleFormat.VideoLighting          = DXVA2_VideoLighting_dim;
 533
 534  if     (avctx->color_range == AVCOL_RANGE_JPEG)
 535    m_format.SampleFormat.NominalRange = DXVA2_NominalRange_0_255;
 536  else if(avctx->color_range == AVCOL_RANGE_MPEG)
 537    m_format.SampleFormat.NominalRange = DXVA2_NominalRange_16_235;
 538  else
 539    m_format.SampleFormat.NominalRange = DXVA2_NominalRange_Unknown;
 540
 541  switch(avctx->chroma_sample_location)
 542  {
 543    case AVCHROMA_LOC_LEFT:
 544      m_format.SampleFormat.VideoChromaSubsampling = DXVA2_VideoChromaSubsampling_Horizontally_Cosited 
 545                                                   | DXVA2_VideoChromaSubsampling_Vertically_AlignedChromaPlanes;
 546      break;
 547    case AVCHROMA_LOC_CENTER:
 548      m_format.SampleFormat.VideoChromaSubsampling = DXVA2_VideoChromaSubsampling_Vertically_AlignedChromaPlanes;
 549      break;
 550    case AVCHROMA_LOC_TOPLEFT:
 551      m_format.SampleFormat.VideoChromaSubsampling = DXVA2_VideoChromaSubsampling_Horizontally_Cosited 
 552                                                   | DXVA2_VideoChromaSubsampling_Vertically_Cosited;
 553      break;
 554    default:
 555      m_format.SampleFormat.VideoChromaSubsampling = DXVA2_VideoChromaSubsampling_Unknown;      
 556  }
 557
 558  switch(avctx->colorspace)
 559  {
 560    case AVCOL_SPC_BT709:
 561      m_format.SampleFormat.VideoTransferMatrix = DXVA2_VideoTransferMatrix_BT709;
 562      break;
 563    case AVCOL_SPC_BT470BG:
 564    case AVCOL_SPC_SMPTE170M:
 565      m_format.SampleFormat.VideoTransferMatrix = DXVA2_VideoTransferMatrix_BT601;
 566      break;
 567    case AVCOL_SPC_SMPTE240M:
 568      m_format.SampleFormat.VideoTransferMatrix = DXVA2_VideoTransferMatrix_SMPTE240M;
 569      break;
 570    case AVCOL_SPC_FCC:
 571    case AVCOL_SPC_UNSPECIFIED:
 572    case AVCOL_SPC_RGB:
 573    default:
 574      m_format.SampleFormat.VideoTransferMatrix = DXVA2_VideoTransferMatrix_Unknown;
 575  }
 576
 577  switch(avctx->color_primaries)
 578  {
 579    case AVCOL_PRI_BT709:
 580      m_format.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_BT709;
 581      break;
 582    case AVCOL_PRI_BT470M:
 583      m_format.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_BT470_2_SysM;
 584      break;
 585    case AVCOL_PRI_BT470BG:
 586      m_format.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_BT470_2_SysBG;
 587      break;
 588    case AVCOL_PRI_SMPTE170M:
 589      m_format.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_SMPTE170M;
 590      break;
 591    case AVCOL_PRI_SMPTE240M:
 592      m_format.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_SMPTE240M;
 593      break;
 594    case AVCOL_PRI_FILM:
 595    case AVCOL_PRI_UNSPECIFIED:
 596    default:
 597      m_format.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_Unknown;
 598  }
 599
 600  switch(avctx->color_trc)
 601  {
 602    case AVCOL_TRC_BT709:
 603      m_format.SampleFormat.VideoTransferFunction = DXVA2_VideoTransFunc_709;
 604      break;
 605    case AVCOL_TRC_GAMMA22:
 606      m_format.SampleFormat.VideoTransferFunction = DXVA2_VideoTransFunc_22;
 607      break;
 608    case AVCOL_TRC_GAMMA28:
 609      m_format.SampleFormat.VideoTransferFunction = DXVA2_VideoTransFunc_28;
 610      break;
 611    default:
 612      m_format.SampleFormat.VideoTransferFunction = DXVA2_VideoTransFunc_Unknown;
 613  }
 614
 615  if (avctx->time_base.den > 0 && avctx->time_base.num > 0)
 616  {
 617    m_format.InputSampleFreq.Numerator   = avctx->time_base.num;
 618    m_format.InputSampleFreq.Denominator = avctx->time_base.den;
 619  } 
 620  m_format.OutputFrameFreq = m_format.InputSampleFreq;
 621  m_format.UABProtectionLevel = FALSE;
 622  m_format.Reserved = 0;
 623
 624  if (surfaces > m_shared)
 625    m_shared = surfaces;
 626
 627  if(avctx->refs > m_refs)
 628    m_refs = avctx->refs;
 629
 630  if(m_refs == 0)
 631  {
 632    if(avctx->codec_id == AV_CODEC_ID_H264)
 633      m_refs = 16;
 634    else
 635      m_refs = 2;
 636  }
 637  CLog::Log(LOGDEBUG, "DXVA - source requires %d references", avctx->refs);
 638
 639  // find what decode configs are available
 640  UINT                       cfg_count = 0;
 641  DXVA2_ConfigPictureDecode *cfg_list  = NULL;
 642  CHECK(m_service->GetDecoderConfigurations(m_input
 643                                          , &m_format
 644                                          , NULL
 645                                          , &cfg_count
 646                                          , &cfg_list))
 647  SCOPE(DXVA2_ConfigPictureDecode, cfg_list);
 648
 649  DXVA2_ConfigPictureDecode config = {};
 650
 651  unsigned bitstream = 2; // ConfigBitstreamRaw = 2 is required for Poulsbo and handles skipping better with nVidia
 652  for(unsigned i = 0; i< cfg_count; i++)
 653  {
 654    CLog::Log(LOGDEBUG,
 655              "DXVA - config %d: bitstream type %d%s",
 656              i,
 657              cfg_list[i].ConfigBitstreamRaw,
 658              IsEqualGUID(cfg_list[i].guidConfigBitstreamEncryption, DXVA_NoEncrypt) ? "" : ", encrypted");
 659
 660    // select first available
 661    if(config.ConfigBitstreamRaw == 0 && cfg_list[i].ConfigBitstreamRaw != 0)
 662      config = cfg_list[i];
 663
 664    // overide with preferred if found
 665    if(config.ConfigBitstreamRaw != bitstream && cfg_list[i].ConfigBitstreamRaw == bitstream)
 666      config = cfg_list[i];
 667  }
 668
 669  if(!config.ConfigBitstreamRaw)
 670  {
 671    CLog::Log(LOGDEBUG, "DXVA - failed to find a raw input bitstream");
 672    return false;
 673  }
 674  *const_cast<DXVA2_ConfigPictureDecode*>(m_context->cfg) = config;
 675
 676  m_surface_context = new CSurfaceContext();
 677
 678  if(!OpenDecoder())
 679    return false;
 680
 681  avctx->get_buffer      = GetBufferS;
 682  avctx->release_buffer  = RelBufferS;
 683  avctx->hwaccel_context = m_context;
 684
 685  if (IsL41LimitedATI())
 686  {
 687#ifdef FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG
 688    m_context->workaround |= FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG;
 689#else
 690    CLog::Log(LOGWARNING, "DXVA - video card with different scaling list zigzag order detected, but no support in libavcodec");
 691#endif
 692  }
 693
 694  m_state = DXVA_OPEN;
 695  return true;
 696}
 697
 698int CDecoder::Decode(AVCodecContext* avctx, AVFrame* frame)
 699{
 700  CSingleLock lock(m_section);
 701  int result = Check(avctx);
 702  if(result)
 703    return result;
 704
 705  if(frame)
 706  {
 707    for(unsigned i = 0; i < m_buffer_count; i++)
 708    {
 709      if(m_buffer[i].surface == (IDirect3DSurface9*)frame->data[3])
 710        return VC_BUFFER | VC_PICTURE;
 711    }
 712    CLog::Log(LOGWARNING, "DXVA - ignoring invalid surface");
 713    return VC_BUFFER;
 714  }
 715  else
 716    return 0;
 717}
 718
 719bool CDecoder::GetPicture(AVCodecContext* avctx, AVFrame* frame, DVDVideoPicture* picture)
 720{
 721  ((CDVDVideoCodecFFmpeg*)avctx->opaque)->GetPictureCommon(picture);
 722  CSingleLock lock(m_section);
 723  picture->format = RENDER_FMT_DXVA;
 724  picture->extended_format = (unsigned int)m_format.Format;
 725  picture->context = m_surface_context;
 726  picture->data[3]= frame->data[3];
 727  return true;
 728}
 729
 730int CDecoder::Check(AVCodecContext* avctx)
 731{
 732  CSingleLock lock(m_section);
 733
 734  if(m_state == DXVA_RESET)
 735    Close();
 736
 737  if(m_state == DXVA_LOST)
 738  {
 739    Close();
 740    lock.Leave();
 741    m_event.WaitMSec(2000);
 742    lock.Enter();
 743    if(m_state == DXVA_LOST)
 744    {
 745      CLog::Log(LOGERROR, "CDecoder::Check - device didn't reset in reasonable time");
 746      return VC_ERROR;
 747    }
 748  }
 749
 750  if(m_format.SampleWidth  == 0
 751  || m_format.SampleHeight == 0)
 752  {
 753    if(!Open(avctx, avctx->pix_fmt, m_shared))
 754    {
 755      CLog::Log(LOGERROR, "CDecoder::Check - decoder was not able to reset");
 756      Close();
 757      return VC_ERROR;
 758    }
 759    return VC_FLUSHED;
 760  }
 761  else
 762  {
 763    if(avctx->refs > m_refs)
 764    {
 765      CLog::Log(LOGWARNING, "CDecoder::Check - number of required reference frames increased, recreating decoder");
 766#if ALLOW_ADDING_SURFACES
 767      if(!OpenDecoder())
 768        return VC_ERROR;
 769#else
 770      Close();
 771      return VC_FLUSHED;
 772#endif
 773    }
 774  }
 775
 776  // Status reports are available only for the DXVA2_ModeH264 and DXVA2_ModeVC1 modes
 777  if(avctx->codec_id != AV_CODEC_ID_H264
 778  && avctx->codec_id != AV_CODEC_ID_VC1
 779  && avctx->codec_id != AV_CODEC_ID_WMV3)
 780    return 0;
 781
 782  DXVA2_DecodeExecuteParams params = {};
 783  DXVA2_DecodeExtensionData data   = {};
 784  union {
 785    DXVA_Status_H264 h264;
 786    DXVA_Status_VC1  vc1;
 787  } status = {};
 788
 789  params.pExtensionData = &data;
 790  data.Function = DXVA_STATUS_REPORTING_FUNCTION;
 791  data.pPrivateOutputData    = &status;
 792  data.PrivateOutputDataSize = avctx->codec_id == AV_CODEC_ID_H264 ? sizeof(DXVA_Status_H264) : sizeof(DXVA_Status_VC1);
 793  HRESULT hr;
 794  if(FAILED( hr = m_decoder->Execute(&params)))
 795  {
 796    CLog::Log(LOGWARNING, "DXVA - failed to get decoder status - 0x%08X", hr);
 797    return VC_ERROR;
 798  }
 799
 800  if(avctx->codec_id == AV_CODEC_ID_H264)
 801  {
 802    if(status.h264.bStatus)
 803      CLog::Log(LOGWARNING, "DXVA - decoder problem of status %d with %d", status.h264.bStatus, status.h264.bBufType);
 804  }
 805  else
 806  {
 807    if(status.vc1.bStatus)
 808      CLog::Log(LOGWARNING, "DXVA - decoder problem of status %d with %d", status.vc1.bStatus, status.vc1.bBufType);
 809  }
 810  return 0;
 811}
 812
 813bool CDecoder::OpenTarget(const GUID &guid)
 814{
 815  UINT       output_count = 0;
 816  D3DFORMAT *output_list  = NULL;
 817  CHECK(m_service->GetDecoderRenderTargets(guid, &output_count, &output_list))
 818  SCOPE(D3DFORMAT, output_list);
 819
 820  for (unsigned i = 0; render_targets[i] != D3DFMT_UNKNOWN; i++)
 821      for(unsigned k = 0; k < output_count; k++)
 822          if (output_list[k] == render_targets[i])
 823          {
 824              m_input = guid;
 825              m_format.Format = output_list[k];
 826              return true;
 827          }
 828
 829  return false;
 830}
 831
 832bool CDecoder::OpenDecoder()
 833{
 834  SAFE_RELEASE(m_decoder);
 835  m_context->decoder = NULL;
 836
 837  m_context->surface_count = m_refs + 1 + 1 + m_shared; // refs + 1 decode + 1 libavcodec safety + processor buffer
 838
 839  if(m_context->surface_count > m_buffer_count)
 840  {
 841    CLog::Log(LOGDEBUG, "DXVA - allocating %d surfaces", m_context->surface_count - m_buffer_count);
 842
 843    CHECK(m_service->CreateSurface( (m_format.SampleWidth  + 15) & ~15
 844                                  , (m_format.SampleHeight + 15) & ~15
 845                                  , m_context->surface_count - 1 - m_buffer_count
 846                                  , m_format.Format
 847                                  , D3DPOOL_DEFAULT
 848                                  , 0
 849                                  , DXVA2_VideoDecoderRenderTarget
 850                                  , m_context->surface + m_buffer_count, NULL ));
 851
 852    for(unsigned i = m_buffer_count; i < m_context->surface_count; i++)
 853    {
 854      m_buffer[i].surface = m_context->surface[i];
 855      m_surface_context->HoldSurface(m_context->surface[i]);
 856    }
 857
 858    m_buffer_count = m_context->surface_count;
 859  }
 860
 861  CHECK(m_service->CreateVideoDecoder(m_input, &m_format
 862                                    , m_context->cfg
 863                                    , m_context->surface
 864                                    , m_context->surface_count
 865                                    , &m_decoder))
 866
 867  m_context->decoder = m_decoder;
 868
 869  return true;
 870}
 871
 872bool CDecoder::Supports(enum PixelFormat fmt)
 873{
 874  if(fmt == PIX_FMT_DXVA2_VLD)
 875    return true;
 876  return false;
 877}
 878
 879void CDecoder::RelBuffer(AVCodecContext *avctx, AVFrame *pic)
 880{
 881  CSingleLock lock(m_section);
 882  IDirect3DSurface9* surface = (IDirect3DSurface9*)pic->data[3];
 883
 884  for(unsigned i = 0; i < m_buffer_count; i++)
 885  {
 886    if(m_buffer[i].surface == surface)
 887    {
 888      m_buffer[i].used = false;
 889      m_buffer[i].age  = ++m_buffer_age;
 890      break;
 891    }
 892  }
 893  for(unsigned i = 0; i < 4; i++)
 894    pic->data[i] = NULL;
 895}
 896
 897int CDecoder::GetBuffer(AVCodecContext *avctx, AVFrame *pic)
 898{
 899  CSingleLock lock(m_section);
 900  if(avctx->coded_width  != m_format.SampleWidth
 901  || avctx->coded_height != m_format.SampleHeight)
 902  {
 903    Close();
 904    if(!Open(avctx, avctx->pix_fmt, m_shared))
 905    {
 906      Close();
 907      return -1;
 908    }
 909  }
 910
 911  int           count = 0;
 912  SVideoBuffer* buf   = NULL;
 913  for(unsigned i = 0; i < m_buffer_count; i++)
 914  {
 915    if(m_buffer[i].used)
 916      count++;
 917    else
 918    {
 919      if(!buf || buf->age > m_buffer[i].age)
 920        buf = m_buffer+i;
 921    }
 922  }
 923
 924  if(count >= m_refs+2)
 925  {
 926    m_refs++;
 927#if ALLOW_ADDING_SURFACES
 928    if(!OpenDecoder())
 929      return -1;
 930    return GetBuffer(avctx, pic);
 931#else
 932    Close();
 933    return -1;
 934#endif
 935  }
 936
 937  if(!buf)
 938  {
 939    CLog::Log(LOGERROR, "DXVA - unable to find new unused buffer");
 940    return -1;
 941  }
 942
 943  pic->reordered_opaque = avctx->reordered_opaque;
 944  pic->type = FF_BUFFER_TYPE_USER;
 945
 946  for(unsigned i = 0; i < 4; i++)
 947  {
 948    pic->data[i] = NULL;
 949    pic->linesize[i] = 0;
 950  }
 951
 952  pic->data[0] = (uint8_t*)buf->surface;
 953  pic->data[3] = (uint8_t*)buf->surface;
 954  buf->used = true;
 955
 956  return 0;
 957}
 958
 959unsigned CDecoder::GetAllowedReferences()
 960{
 961  return m_shared;
 962}
 963
 964
 965//---------------------------------------------------------------------------
 966//---------------------------------------------------------------------------
 967//------------------------ PROCESSING SERVICE -------------------------------
 968//---------------------------------------------------------------------------
 969//---------------------------------------------------------------------------
 970
 971CProcessor::CProcessor()
 972{
 973  m_service = NULL;
 974  m_process = NULL;
 975  m_time    = 0;
 976  g_Windowing.Register(this);
 977
 978  m_surfaces = NULL;
 979  m_context = NULL;
 980  m_index = 0;
 981  m_progressive = true;
 982}
 983
 984CProcessor::~CProcessor()
 985{
 986  g_Windowing.Unregister(this);
 987  UnInit();
 988}
 989
 990void CProcessor::UnInit()
 991{
 992  CSingleLock lock(m_section);
 993  Close();
 994  SAFE_RELEASE(m_service);
 995}
 996
 997void CProcessor::Close()
 998{
 999  CSingleLock lock(m_section);
1000  SAFE_RELEASE(m_process);
1001  for(unsigned i = 0; i < m_sample.size(); i++)
1002  {
1003    SAFE_RELEASE(m_sample[i].context);
1004    SAFE_RELEASE(m_sample[i].sample.SrcSurface);
1005  }
1006  m_sample.clear();
1007
1008  SAFE_RELEASE(m_context);
1009  if (m_surfaces)
1010  {
1011    for (unsigned i = 0; i < m_size; i++)
1012      SAFE_RELEASE(m_surfaces[i]);
1013    free(m_surfaces);
1014    m_surfaces = NULL;
1015  }
1016}
1017
1018bool CProcessor::UpdateSize(const DXVA2_VideoDesc& dsc)
1019{
1020  // TODO: print the D3FORMAT text version in log
1021  CLog::Log(LOGDEBUG, "DXVA - cheking samples array size using %d render target", dsc.Format);
1022
1023  GUID* deint_guid_list = NULL;
1024  unsigned guid_count = 0;
1025  if (FAILED(m_service->GetVideoProcessorDeviceGuids(&dsc, &guid_count, &deint_guid_list)))
1026    return false;
1027
1028  SCOPE(GUID, deint_guid_list);
1029  
1030  for (unsigned i = 0; i < guid_count; i++)
1031  {
1032    DXVA2_VideoProcessorCaps caps;
1033    CHECK(m_service->GetVideoProcessorCaps(deint_guid_list[i], &dsc, D3DFMT_X8R8G8B8, &caps));
1034    if (caps.NumBackwardRefSamples + caps.NumForwardRefSamples > m_size)
1035    {
1036      m_size = caps.NumBackwardRefSamples + caps.NumForwardRefSamples;
1037      CLog::Log(LOGDEBUG, "DXVA - updated maximum samples count to %d", m_size);
1038    }
1039    m_max_back_refs = std::max(caps.NumBackwardRefSamples, m_max_back_refs);
1040    m_max_fwd_refs = std::max(caps.NumForwardRefSamples, m_max_fwd_refs);
1041  }
1042
1043  return true;
1044}
1045
1046bool CProcessor::PreInit()
1047{
1048  if (!LoadDXVA())
1049    return false;
1050
1051  UnInit();
1052
1053  CSingleLock lock(m_section);
1054
1055  if (FAILED(g_DXVA2CreateVideoService(g_Windowing.Get3DDevice(), IID_IDirectXVideoProcessorService, (void**)&m_service)))
1056    return false;
1057
1058  m_size = 0;
1059
1060  // We try to find the maximum count of reference frames using a standard resolution and all known render target formats
1061  DXVA2_VideoDesc dsc = {};
1062  dsc.SampleWidth = 640;
1063  dsc.SampleHeight = 480;
1064  dsc.SampleFormat.SampleFormat = DXVA2_SampleFieldInterleavedOddFirst;
1065
1066  m_max_back_refs = 0;
1067  m_max_fwd_refs = 0;
1068
1069  for (unsigned i = 0; render_targets[i] != D3DFMT_UNKNOWN; i++)
1070  {
1071    dsc.Format = render_targets[i];
1072    if (!UpdateSize(dsc))
1073      CLog::Log(LOGDEBUG, "DXVA - render target not supported by processor");
1074  }
1075
1076  m_size = m_max_back_refs + 1 + m_max_fwd_refs + 2;  // refs + 1 display + 2 safety frames
1077
1078  return true;
1079}
1080
1081bool CProcessor::Open(UINT width, UINT height, unsigned int flags, unsigned int format, unsigned int extended_format)
1082{
1083  Close();
1084
1085  CSingleLock lock(m_section);
1086
1087  if (!m_service)
1088    return false;
1089
1090  DXVA2_VideoDesc dsc;
1091  memset(&dsc, 0, sizeof(DXVA2_VideoDesc));
1092
1093  dsc.SampleWidth = width;
1094  dsc.SampleHeight = height;
1095  dsc.SampleFormat.VideoLighting = DXVA2_VideoLighting_dim;
1096
1097  switch (CONF_FLAGS_CHROMA_MASK(flags))
1098  {
1099    case CONF_FLAGS_CHROMA_LEFT:
1100      dsc.SampleFormat.VideoChromaSubsampling = DXVA2_VideoChromaSubsampling_Horizontally_Cosited
1101                                              | DXVA2_VideoChromaSubsampling_Vertically_AlignedChromaPlanes;
1102      break;
1103    case CONF_FLAGS_CHROMA_CENTER:
1104      dsc.SampleFormat.VideoChromaSubsampling = DXVA2_VideoChromaSubsampling_Vertically_AlignedChromaPlanes;
1105      break;
1106    case CONF_FLAGS_CHROMA_TOPLEFT:
1107      dsc.SampleFormat.VideoChromaSubsampling = DXVA2_VideoChromaSubsampling_Horizontally_Cosited
1108                                              | DXVA2_VideoChromaSubsampling_Vertically_Cosited;
1109      break;
1110    default:
1111      dsc.SampleFormat.VideoChromaSubsampling = DXVA2_VideoChromaSubsampling_Unknown;
1112  }
1113
1114  if (flags & CONF_FLAGS_YUV_FULLRANGE)
1115    dsc.SampleFormat.NominalRange = DXVA2_NominalRange_0_255;
1116  else
1117    dsc.SampleFormat.NominalRange = DXVA2_NominalRange_16_235;
1118
1119  switch (CONF_FLAGS_YUVCOEF_MASK(flags))
1120  {
1121    case CONF_FLAGS_YUVCOEF_240M:
1122      dsc.SampleFormat.VideoTransferMatrix = DXVA2_VideoTransferMatrix_SMPTE240M;
1123      break;
1124    case CONF_FLAGS_YUVCOEF_BT601:
1125      dsc.SampleFormat.VideoTransferMatrix = DXVA2_VideoTransferMatrix_BT601;
1126      break;
1127    case CONF_FLAGS_YUVCOEF_BT709:
1128      dsc.SampleFormat.VideoTransferMatrix = DXVA2_VideoTransferMatrix_BT709;
1129      break;
1130    default:
1131      dsc.SampleFormat.VideoTransferMatrix = DXVA2_VideoTransferMatrix_Unknown;
1132  }
1133
1134  switch (CONF_FLAGS_COLPRI_MASK(flags))
1135  {
1136    case CONF_FLAGS_COLPRI_BT709:
1137      dsc.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_BT709;
1138      break;
1139    case CONF_FLAGS_COLPRI_BT470M:
1140      dsc.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_BT470_2_SysM;
1141      break;
1142    case CONF_FLAGS_COLPRI_BT470BG:
1143      dsc.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_BT470_2_SysBG;
1144      break;
1145    case CONF_FLAGS_COLPRI_170M:
1146      dsc.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_SMPTE170M;
1147      break;
1148    case CONF_FLAGS_COLPRI_240M:
1149      dsc.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_SMPTE240M;
1150      break;
1151    default:
1152      dsc.SampleFormat.VideoPrimaries = DXVA2_VideoPrimaries_Unknown;
1153  }
1154
1155  switch (CONF_FLAGS_TRC_MASK(flags))
1156  {
1157    case CONF_FLAGS_TRC_BT709:
1158      dsc.SampleFormat.VideoTransferFunction = DXVA2_VideoTransFunc_709;
1159      break;
1160    case CONF_FLAGS_TRC_GAMMA22:
1161      dsc.SampleFormat.VideoTransferFunction = DXVA2_VideoTransFunc_22;
1162      break;
1163    case CONF_FLAGS_TRC_GAMMA28:
1164      dsc.SampleFormat.VideoTransferFunction = DXVA2_VideoTransFunc_28;
1165      break;
1166    default:
1167      dsc.SampleFormat.VideoTransferFunction = DXVA2_VideoTransFunc_Unknown;
1168  }
1169
1170  m_desc = dsc;
1171
1172  if (format == RENDER_FMT_DXVA)
1173    m_desc.Format = (D3DFORMAT)extended_format;
1174  else
1175  {
1176    // Only NV12 software colorspace conversion is implemented for now
1177    m_desc.Format = (D3DFORMAT)MAKEFOURCC('N','V','1','2');
1178    if (!CreateSurfaces())
1179      return false;
1180  }
1181
1182  // frame flags are not available to do the complete calculation of the deinterlacing mode, as done in Render()
1183  // It's OK, as it doesn't make any difference for all hardware except the few GPUs on the quirk list.
1184  // And for those GPUs, the correct values will be calculated with the first Render() and the correct processor
1185  // will replace the one allocated here, before the user sees anything.
1186  // It's a bit inefficient, that's all.
1187  m_deinterlace_mode = CMediaSettings::Get().GetCurrentVideoSettings().m_DeinterlaceMode;
1188  m_interlace_method = g_renderManager.AutoInterlaceMethod(CMediaSettings::Get().GetCurrentVideoSettings().m_InterlaceMethod);;
1189
1190  EvaluateQuirkNoDeintProcForProg();
1191
1192  if (g_advancedSettings.m_DXVANoDeintProcForProgressive || m_quirk_nodeintprocforprog)
1193    CLog::Log(LOGNOTICE, "DXVA: Auto deinterlacing mode workaround activated. Deinterlacing processor will be used only for interlaced frames.");
1194
1195  if (!OpenProcessor())
1196    return false;
1197
1198  m_time = 0;
1199
1200  return true;
1201}
1202
1203void CProcessor::EvaluateQuirkNoDeintProcForProg()
1204{
1205  D3DADAPTER_IDENTIFIER9 AIdentifier = g_Windowing.GetAIdentifier();
1206
1207  for (unsigned idx = 0; NoDeintProcForProgDevices[idx].VendorID != 0; idx++)
1208  {
1209    if(NoDeintProcForProgDevices[idx].VendorID == AIdentifier.VendorId
1210    && NoDeintProcForProgDevices[idx].DeviceID == AIdentifier.DeviceId)
1211    {
1212      m_quirk_nodeintprocforprog = true;
1213      return;
1214    }
1215  }
1216  m_quirk_nodeintprocforprog = false;
1217}
1218
1219bool CProcessor::SelectProcessor()
1220{
1221  // The CProcessor can be run after dxva or software decoding, possibly after software deinterlacing.
1222
1223  // Deinterlace mode off: force progressive
1224  // Deinterlace mode auto or force, with a dxva deinterlacing method: create an deinterlacing capable processor. The frame flags will tell it to deinterlace or not.
1225  m_progressive = m_deinterlace_mode == VS_DEINTERLACEMODE_OFF
1226                  || (   m_interlace_method != VS_INTERLACEMETHOD_DXVA_BOB
1227                      && m_interlace_method != VS_INTERLACEMETHOD_DXVA_BEST);
1228
1229  if (m_progressive)
1230    m_desc.SampleFormat.SampleFormat = DXVA2_SampleProgressiveFrame;
1231  else
1232    m_desc.SampleFormat.SampleFormat = DXVA2_SampleFieldInterleavedEvenFirst;
1233
1234  GUID*    guid_list;
1235  unsigned guid_count;
1236  CHECK(m_service->GetVideoProcessorDeviceGuids(&m_desc, &guid_count, &guid_list));
1237  SCOPE(GUID, guid_list);
1238
1239  if(guid_count == 0)
1240  {
1241    CLog::Log(LOGDEBUG, "DXVA - unable to find any processors");
1242    return false;
1243  }
1244
1245  for(unsigned i = 0; i < guid_count; i++)
1246  {
1247    const GUID* g = &guid_list[i];
1248    const dxva2_device_t* device = dxva2_find_device(g);
1249
1250    if (device)
1251    {
1252      CLog::Log(LOGDEBUG, "DXVA - processor found %s", device->name);
1253    }
1254    else
1255    {
1256      CHECK(m_service->GetVideoProcessorCaps(*g, &m_desc, D3DFMT_X8R8G8B8, &m_caps));
1257      const dxva2_deinterlacetech_t* tech = dxva2_find_deinterlacetech(m_caps.DeinterlaceTechnology);
1258      if (tech != NULL)
1259        CLog::Log(LOGDEBUG, "DXVA - unknown processor %s found, deinterlace technology %s", GUIDToString(*g).c_str(), tech->name);
1260      else
1261        CLog::Log(LOGDEBUG, "DXVA - unknown processor %s found, unknown technology", GUIDToString(*g).c_str());
1262    }
1263  }
1264
1265  if (m_progressive)
1266    m_device = DXVA2_VideoProcProgressiveDevice;
1267  else if(m_interlace_method == VS_INTERLACEMETHOD_DXVA_BEST)
1268    m_device = guid_list[0];
1269  else
1270    m_device = DXVA2_VideoProcBobDevice;
1271
1272  return true;
1273}
1274
1275bool CProcessor::OpenProcessor()
1276{
1277  if (!SelectProcessor())
1278    return false;
1279
1280  SAFE_RELEASE(m_process);
1281
1282  const dxva2_device_t* device = dxva2_find_device(&m_device);
1283  if (device)
1284    CLog::Log(LOGDEBUG, "DXVA - processor selected %s", device->name);
1285  else
1286    CLog::Log(LOGDEBUG, "DXVA - processor selected %s", GUIDToString(m_device).c_str());
1287
1288  D3DFORMAT rtFormat = D3DFMT_X8R8G8B8;
1289  CHECK(m_service->GetVideoProcessorCaps(m_device, &m_desc, rtFormat, &m_caps))
1290
1291  /* HACK for Intel Egde Device. 
1292   * won't work if backward refs is equals value from the capabilities *
1293   * Possible reasons are:                                             *
1294   * 1) The device capabilities are incorrectly reported               *
1295   * 2) The device is broken                                           */
1296  if (IsEqualGUID(m_device, DXVA2_VideoProcIntelEdgeDevice))
1297    m_caps.NumBackwardRefSamples = 0;
1298
1299  if (m_caps.DeviceCaps & DXVA2_VPDev_SoftwareDevice)
1300    CLog::Log(LOGDEBUG, "DXVA - processor is software device");
1301
1302  if (m_caps.DeviceCaps & DXVA2_VPDev_EmulatedDXVA1)
1303    CLog::Log(LOGDEBUG, "DXVA - processor is emulated dxva1");
1304
1305  CLog::Log(LOGDEBUG, "DXVA - processor requires %d past frames and %d future frames", m_caps.NumBackwardRefSamples, m_caps.NumForwardRefSamples);
1306
1307  if (m_caps.NumBackwardRefSamples + m_caps.NumForwardRefSamples + 3 > m_size)
1308  {
1309    CLog::Log(LOGERROR, "DXVA - used an incorrect number of reference frames creating processor");
1310    return false;
1311  }
1312
1313  CHECK(m_service->CreateVideoProcessor(m_device, &m_desc, rtFormat, 0, &m_process));
1314
1315  CHECK(m_service->GetProcAmpRange(m_device, &m_desc, rtFormat, DXVA2_ProcAmp_Brightness, &m_brightness));
1316  CHECK(m_service->GetProcAmpRange(m_device, &m_desc, rtFormat, DXVA2_ProcAmp_Contrast  , &m_contrast));
1317  CHECK(m_service->GetProcAmpRange(m_device, &m_desc, rtFormat, DXVA2_ProcAmp_Hue       , &m_hue));
1318  CHECK(m_service->GetProcAmpRange(m_device, &m_desc, rtFormat, DXVA2_ProcAmp_Saturation, &m_saturation));
1319
1320  return true;
1321}
1322
1323bool CProcessor::CreateSurfaces()
1324{
1325  LPDIRECT3DDEVICE9 pD3DDevice = g_Windowing.Get3DDevice();
1326  m_surfaces = (LPDIRECT3DSURFACE9*)calloc(m_size, sizeof(LPDIRECT3DSURFACE9));
1327  for (unsigned idx = 0; idx < m_size; idx++)
1328    CHECK(pD3DDevice->CreateOffscreenPlainSurface(
1329                                (m_desc.SampleWidth + 15) & ~15,
1330                                (m_desc.SampleHeight + 15) & ~15,
1331                                m_desc.Format,
1332                                D3DPOOL_DEFAULT,
1333                                &m_surfaces[idx],
1334                                NULL));
1335
1336  m_context = new CSurfaceContext();
1337
1338  return true;
1339}
1340
1341REFERENCE_TIME CProcessor::Add(DVDVideoPicture* picture)
1342{
1343  CSingleLock lock(m_section);
1344
1345  IDirect3DSurface9* surface = NULL;
1346  CSurfaceContext* context = NULL;
1347
1348  if (picture->iFlags & DVP_FLAG_DROPPED)
1349    return 0;
1350
1351  switch (picture->format)
1352  {
1353    case RENDER_FMT_DXVA:
1354    {
1355      surface = (IDirect3DSurface9*)picture->data[3];
1356      context = picture->context;
1357      break;
1358    }
1359
1360    case RENDER_FMT_YUV420P:
1361    {
1362      surface = m_surfaces[m_index];
1363      m_index = (m_index + 1) % m_size;
1364
1365      context = m_context;
1366  
1367      D3DLOCKED_RECT rectangle;
1368      if (FAILED(surface->LockRect(&rectangle, NULL, 0)))
1369        return 0;
1370
1371      // Convert to NV12 - Luma
1372      // TODO: Optimize this later using shaders/swscale/etc.
1373      uint8_t *s = picture->data[0];
1374      uint8_t* bits = (uint8_t*)(rectangle.pBits);
1375      for (unsigned y = 0; y < picture->iHeight; y++)
1376      {
1377        memcpy(bits, s, picture->iWidth);
1378        s += picture->iLineSize[0];
1379        bits += rectangle.Pitch;
1380      }
1381
1382      D3DSURFACE_DESC desc;
1383      if (FAILED(surface->GetDesc(&desc)))
1384        return 0;
1385
1386      // Convert to NV12 - Chroma
1387      for (unsigned y = 0; y < picture->iHeight/2; y++)
1388      {
1389        uint8_t *s_u = picture->data[1] + (y * picture->iLineSize[1]);
1390        uint8_t *s_v = picture->data[2] + (y * picture->iLineSize[2]);
1391        uint8_t *d_uv = ((uint8_t*)(rectangle.pBits)) + (desc.Height + y) * rectangle.Pitch;
1392        for (unsigned x = 0; x < picture->iWidth/2; x++)
1393        {
1394          *d_uv++ = *s_u++;
1395          *d_uv++ = *s_v++;
1396        }
1397      }
1398  
1399      if (FAILED(surface->UnlockRect()))
1400        return 0;
1401
1402      break;
1403    }
1404    
1405    default:
1406    {
1407      CLog::Log(LOGWARNING, "DXVA - colorspace not supported by processor, skipping frame");
1408      return 0;
1409    }
1410  }
1411
1412  if (!surface || !context)
1413    return 0;
1414
1415  m_time += 2;
1416
1417  surface->AddRef();
1418  context->Acquire();
1419
1420  SVideoSample vs = {};
1421  vs.sample.Start          = m_time;
1422  vs.sample.End            = 0; 
1423  vs.sample.SampleFormat   = m_desc.SampleFormat;
1424
1425  if (picture->iFlags & DVP_FLAG_INTERLACED)
1426  {
1427    if (picture->iFlags & DVP_FLAG_TOP_FIELD_FIRST)
1428      vs.sample.SampleFormat.SampleFormat = DXVA2_SampleFieldInterleavedEvenFirst;
1429    else
1430      vs.sample.SampleFormat.SampleFormat = DXVA2_SampleFieldInterleavedOddFirst;
1431  }
1432  else
1433  {
1434    vs.sample.SampleFormat.SampleFormat = DXVA2_SampleProgressiveFrame;
1435  }
1436
1437  vs.sample.PlanarAlpha    = DXVA2_Fixed32OpaqueAlpha();
1438  vs.sample.SampleData     = 0;
1439  vs.sample.SrcSurface     = surface;
1440
1441
1442  vs.context = context;
1443
1444  if(!m_sample.empty())
1445    m_sample.back().sample.End = vs.sample.Start;
1446
1447  m_sample.push_back(vs);
1448  if (m_sample.size() > m_size)
1449  {
1450    SAFE_RELEASE(m_sample.front().context);
1451    SAFE_RELEASE(m_sample.front().sample.SrcSurface);
1452    m_sample.pop_front();
1453  }
1454
1455  return m_time;
1456}
1457
1458static DXVA2_Fixed32 ConvertRange(const DXVA2_ValueRange& range, int value, int min, int max, int def)
1459{
1460  if(value > def)
1461    return DXVA2FloatToFixed( DXVA2FixedToFloat(range.DefaultValue)
1462                            + (DXVA2FixedToFloat(range.MaxValue) - DXVA2FixedToFloat(range.DefaultValue))
1463                            * (value - def) / (max - def) );
1464  else if(value < def)
1465    return DXVA2FloatToFixed( DXVA2FixedToFloat(range.DefaultValue)
1466                            + (DXVA2FixedToFloat(range.MinValue) - DXVA2FixedToFloat(range.DefaultValue)) 
1467                            * (value - def) / (min - def) );
1468  else
1469    return range.DefaultValue;
1470}
1471
1472bool CProcessor::Render(CRect src, CRect dst, IDirect3DSurface9* target, REFERENCE_TIME time, DWORD flags)
1473{
1474  CSingleLock lock(m_section);
1475
1476  // With auto deinterlacing, the Ion Gen. 1 drops some frames with deinterlacing processor + progressive flags for progressive material.
1477  // For that GPU (or when specified by an advanced setting), use the progressive processor.
1478  // This is at the expense of the switch speed when video interlacing flags change and a deinterlacing processor is actually required.
1479  EDEINTERLACEMODE mode = CMediaSettings::Get().GetCurrentVideoSettings().m_DeinterlaceMode;
1480  if (g_advancedSettings.m_DXVANoDeintProcForProgressive || m_quirk_nodeintprocforprog)
1481    mode = (flags & RENDER_FLAG_FIELD0 || flags & RENDER_FLAG_FIELD1) ? VS_DEINTERLACEMODE_FORCE : VS_DEINTERLACEMODE_OFF;
1482  EINTERLACEMETHOD method = g_renderManager.AutoInterlaceMethod(CMediaSettings::Get().GetCurrentVideoSettings().m_InterlaceMethod);
1483  if(m_interlace_method != method
1484  || m_deinterlace_mode != mode
1485  || !m_process)
1486  {
1487    m_deinterlace_mode = mode;
1488    m_interlace_method = method;
1489
1490    if (!OpenProcessor())
1491      return false;
1492  }
1493  
1494  // MinTime and MaxTime are the first and last samples to keep. Delete the rest.
1495  REFERENCE_TIME MinTime = time - m_max_back_refs*2;
1496  REFERENCE_TIME MaxTime = time + m_max_fwd_refs*2;
1497
1498  SSamples::iterator it = m_sample.begin();
1499  while (it != m_sample.end())
1500  {
1501    if (it->sample.Start < MinTime)
1502    {
1503      SAFE_RELEASE(it->context);
1504      SAFE_RELEASE(it->sample.SrcSurface);
1505      it = m_sample.erase(it);
1506    }
1507    else
1508      ++it;
1509  }
1510
1511  if(m_sample.empty())
1512    return false;
1513
1514  // MinTime and MaxTime are now the first and last samples to feed the processor.
1515  MinTime = time - m_caps.NumBackwardRefSamples*2;
1516  MaxTime = time + m_caps.NumForwardRefSamples*2;
1517
1518  D3DSURFACE_DESC desc;
1519  CHECK(target->GetDesc(&desc));
1520  CRect rectTarget(0, 0, desc.Width, desc.Height);
1521  CWIN32Util::CropSource(src, dst, rectTarget);
1522  RECT sourceRECT = { src.x1, src.y1, src.x2, src.y2 };
1523  RECT dstRECT    = { dst.x1, dst.y1, dst.x2, dst.y2 };
1524
1525
1526  // How to prepare the samples array for VideoProcessBlt
1527  // - always provide current picture + the number of forward and backward references required by the current processor.
1528  // - provide the surfaces in the array in increasing temporal order
1529  // - at the start of playback, there may not be enough samples available. Use SampleFormat.SampleFormat = DXVA2_SampleUnknown for the missing samples.
1530
1531  int count = 1 + m_caps.NumBackwardRefSamples + m_caps.NumForwardRefSamples;
1532  int valid = 0;
1533  auto_aptr<DXVA2_VideoSample> samp(new DXVA2_VideoSample[count]);
1534
1535  for (int i = 0; i < count; i++)
1536    samp[i].SampleFormat.SampleFormat = DXVA2_SampleUnknown;
1537
1538  for(it = m_sample.begin(); it != m_sample.end() && valid < count; ++it)
1539  {
1540    if (it->sample.Start >= MinTime && it->sample.Start <= MaxTime)
1541    {
1542      DXVA2_VideoSample& vs = samp[(it->sample.Start - MinTime) / 2];
1543      vs = it->sample;
1544      vs.SrcRect = sourceRECT;
1545      vs.DstRect = dstRECT;
1546      if(vs.End == 0)
1547        vs.End = vs.Start + 2;
1548
1549      // Override the sample format when the processor doesn't need to deinterlace or when deinterlacing is forced and flags are missing.
1550      if (m_progressive)
1551        vs.SampleFormat.SampleFormat = DXVA2_SampleProgressiveFrame;
1552      else if (m_deinterlace_mode == VS_DEINTERLACEMODE_FORCE && vs.Sa

Large files files are truncated, but you can click here to view the full file