/src/core/Processor.cpp

http://github.com/imageworks/OpenColorIO · C++ · 640 lines · 447 code · 135 blank · 58 comment · 58 complexity · 9caaa3022926517866979b3e2dd83f3d MD5 · raw file

  1. /*
  2. Copyright (c) 2003-2010 Sony Pictures Imageworks Inc., et al.
  3. All Rights Reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions are
  6. met:
  7. * Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. * Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in the
  11. documentation and/or other materials provided with the distribution.
  12. * Neither the name of Sony Pictures Imageworks nor the names of its
  13. contributors may be used to endorse or promote products derived from
  14. this software without specific prior written permission.
  15. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  16. "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  17. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  18. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  19. OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  20. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  21. LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  22. DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  25. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include <OpenColorIO/OpenColorIO.h>
  28. #include "AllocationOp.h"
  29. #include "GpuShaderUtils.h"
  30. #include "HashUtils.h"
  31. #include "Logging.h"
  32. #include "Lut3DOp.h"
  33. #include "NoOps.h"
  34. #include "OpBuilders.h"
  35. #include "Processor.h"
  36. #include "ScanlineHelper.h"
  37. #include <algorithm>
  38. #include <cstring>
  39. #include <sstream>
  40. OCIO_NAMESPACE_ENTER
  41. {
  42. //////////////////////////////////////////////////////////////////////////
  43. class ProcessorMetadata::Impl
  44. {
  45. public:
  46. StringSet files;
  47. StringVec looks;
  48. Impl()
  49. { }
  50. ~Impl()
  51. { }
  52. };
  53. ProcessorMetadataRcPtr ProcessorMetadata::Create()
  54. {
  55. return ProcessorMetadataRcPtr(new ProcessorMetadata(), &deleter);
  56. }
  57. ProcessorMetadata::ProcessorMetadata()
  58. : m_impl(new ProcessorMetadata::Impl)
  59. { }
  60. ProcessorMetadata::~ProcessorMetadata()
  61. {
  62. delete m_impl;
  63. m_impl = NULL;
  64. }
  65. void ProcessorMetadata::deleter(ProcessorMetadata* c)
  66. {
  67. delete c;
  68. }
  69. int ProcessorMetadata::getNumFiles() const
  70. {
  71. return static_cast<int>(getImpl()->files.size());
  72. }
  73. const char * ProcessorMetadata::getFile(int index) const
  74. {
  75. if(index < 0 ||
  76. index >= (static_cast<int>(getImpl()->files.size())))
  77. {
  78. return "";
  79. }
  80. StringSet::const_iterator iter = getImpl()->files.begin();
  81. std::advance( iter, index );
  82. return iter->c_str();
  83. }
  84. void ProcessorMetadata::addFile(const char * fname)
  85. {
  86. getImpl()->files.insert(fname);
  87. }
  88. int ProcessorMetadata::getNumLooks() const
  89. {
  90. return static_cast<int>(getImpl()->looks.size());
  91. }
  92. const char * ProcessorMetadata::getLook(int index) const
  93. {
  94. if(index < 0 ||
  95. index >= (static_cast<int>(getImpl()->looks.size())))
  96. {
  97. return "";
  98. }
  99. return getImpl()->looks[index].c_str();
  100. }
  101. void ProcessorMetadata::addLook(const char * look)
  102. {
  103. getImpl()->looks.push_back(look);
  104. }
  105. //////////////////////////////////////////////////////////////////////////
  106. ProcessorRcPtr Processor::Create()
  107. {
  108. return ProcessorRcPtr(new Processor(), &deleter);
  109. }
  110. void Processor::deleter(Processor* c)
  111. {
  112. delete c;
  113. }
  114. Processor::Processor()
  115. : m_impl(new Processor::Impl)
  116. {
  117. }
  118. Processor::~Processor()
  119. {
  120. delete m_impl;
  121. m_impl = NULL;
  122. }
  123. bool Processor::isNoOp() const
  124. {
  125. return getImpl()->isNoOp();
  126. }
  127. bool Processor::hasChannelCrosstalk() const
  128. {
  129. return getImpl()->hasChannelCrosstalk();
  130. }
  131. ConstProcessorMetadataRcPtr Processor::getMetadata() const
  132. {
  133. return getImpl()->getMetadata();
  134. }
  135. void Processor::apply(ImageDesc& img) const
  136. {
  137. getImpl()->apply(img);
  138. }
  139. void Processor::applyRGB(float * pixel) const
  140. {
  141. getImpl()->applyRGB(pixel);
  142. }
  143. void Processor::applyRGBA(float * pixel) const
  144. {
  145. getImpl()->applyRGBA(pixel);
  146. }
  147. const char * Processor::getCpuCacheID() const
  148. {
  149. return getImpl()->getCpuCacheID();
  150. }
  151. const char * Processor::getGpuShaderText(const GpuShaderDesc & shaderDesc) const
  152. {
  153. return getImpl()->getGpuShaderText(shaderDesc);
  154. }
  155. const char * Processor::getGpuShaderTextCacheID(const GpuShaderDesc & shaderDesc) const
  156. {
  157. return getImpl()->getGpuShaderTextCacheID(shaderDesc);
  158. }
  159. void Processor::getGpuLut3D(float* lut3d, const GpuShaderDesc & shaderDesc) const
  160. {
  161. return getImpl()->getGpuLut3D(lut3d, shaderDesc);
  162. }
  163. const char * Processor::getGpuLut3DCacheID(const GpuShaderDesc & shaderDesc) const
  164. {
  165. return getImpl()->getGpuLut3DCacheID(shaderDesc);
  166. }
  167. //////////////////////////////////////////////////////////////////////////
  168. namespace
  169. {
  170. void WriteShaderHeader(std::ostream & shader,
  171. const std::string & pixelName,
  172. const GpuShaderDesc & shaderDesc)
  173. {
  174. if(!shader) return;
  175. std::string lut3dName = "lut3d";
  176. shader << "\n// Generated by OpenColorIO\n\n";
  177. GpuLanguage lang = shaderDesc.getLanguage();
  178. std::string fcnName = shaderDesc.getFunctionName();
  179. if(lang == GPU_LANGUAGE_CG)
  180. {
  181. shader << "half4 " << fcnName << "(in half4 inPixel," << "\n";
  182. shader << " const uniform sampler3D " << lut3dName << ") \n";
  183. }
  184. else if(lang == GPU_LANGUAGE_GLSL_1_0)
  185. {
  186. shader << "vec4 " << fcnName << "(vec4 inPixel, \n";
  187. shader << " sampler3D " << lut3dName << ") \n";
  188. }
  189. else if(lang == GPU_LANGUAGE_GLSL_1_3)
  190. {
  191. shader << "vec4 " << fcnName << "(in vec4 inPixel, \n";
  192. shader << " const sampler3D " << lut3dName << ") \n";
  193. }
  194. else throw Exception("Unsupported shader language.");
  195. shader << "{" << "\n";
  196. if(lang == GPU_LANGUAGE_CG)
  197. {
  198. shader << "half4 " << pixelName << " = inPixel; \n";
  199. }
  200. else if(lang == GPU_LANGUAGE_GLSL_1_0 || lang == GPU_LANGUAGE_GLSL_1_3)
  201. {
  202. shader << "vec4 " << pixelName << " = inPixel; \n";
  203. }
  204. else throw Exception("Unsupported shader language.");
  205. }
  206. void WriteShaderFooter(std::ostream & shader,
  207. const std::string & pixelName,
  208. const GpuShaderDesc & /*shaderDesc*/)
  209. {
  210. shader << "return " << pixelName << ";\n";
  211. shader << "}" << "\n\n";
  212. }
  213. }
  214. //////////////////////////////////////////////////////////////////////////
  215. Processor::Impl::Impl():
  216. m_metadata(ProcessorMetadata::Create())
  217. {
  218. }
  219. Processor::Impl::~Impl()
  220. { }
  221. bool Processor::Impl::isNoOp() const
  222. {
  223. return IsOpVecNoOp(m_cpuOps);
  224. }
  225. bool Processor::Impl::hasChannelCrosstalk() const
  226. {
  227. for(OpRcPtrVec::size_type i=0, size = m_cpuOps.size(); i<size; ++i)
  228. {
  229. if(m_cpuOps[i]->hasChannelCrosstalk()) return true;
  230. }
  231. return false;
  232. }
  233. ConstProcessorMetadataRcPtr Processor::Impl::getMetadata() const
  234. {
  235. return m_metadata;
  236. }
  237. void Processor::Impl::apply(ImageDesc& img) const
  238. {
  239. if(m_cpuOps.empty()) return;
  240. ScanlineHelper scanlineHelper(img);
  241. float * rgbaBuffer = 0;
  242. long numPixels = 0;
  243. while(true)
  244. {
  245. scanlineHelper.prepRGBAScanline(&rgbaBuffer, &numPixels);
  246. if(numPixels == 0) break;
  247. if(!rgbaBuffer)
  248. throw Exception("Cannot apply transform; null image.");
  249. for(OpRcPtrVec::size_type i=0, size = m_cpuOps.size(); i<size; ++i)
  250. {
  251. m_cpuOps[i]->apply(rgbaBuffer, numPixels);
  252. }
  253. scanlineHelper.finishRGBAScanline();
  254. }
  255. }
  256. void Processor::Impl::applyRGB(float * pixel) const
  257. {
  258. if(m_cpuOps.empty()) return;
  259. // We need to allocate a temp array as the pixel must be 4 floats in size
  260. // (otherwise, sse loads will potentially fail)
  261. float rgbaBuffer[4] = { pixel[0], pixel[1], pixel[2], 0.0f };
  262. for(OpRcPtrVec::size_type i=0, size = m_cpuOps.size(); i<size; ++i)
  263. {
  264. m_cpuOps[i]->apply(rgbaBuffer, 1);
  265. }
  266. pixel[0] = rgbaBuffer[0];
  267. pixel[1] = rgbaBuffer[1];
  268. pixel[2] = rgbaBuffer[2];
  269. }
  270. void Processor::Impl::applyRGBA(float * pixel) const
  271. {
  272. for(OpRcPtrVec::size_type i=0, size = m_cpuOps.size(); i<size; ++i)
  273. {
  274. m_cpuOps[i]->apply(pixel, 1);
  275. }
  276. }
  277. const char * Processor::Impl::getCpuCacheID() const
  278. {
  279. AutoMutex lock(m_resultsCacheMutex);
  280. if(!m_cpuCacheID.empty()) return m_cpuCacheID.c_str();
  281. if(m_cpuOps.empty())
  282. {
  283. m_cpuCacheID = "<NOOP>";
  284. }
  285. else
  286. {
  287. std::ostringstream cacheid;
  288. for(OpRcPtrVec::size_type i=0, size = m_cpuOps.size(); i<size; ++i)
  289. {
  290. cacheid << m_cpuOps[i]->getCacheID() << " ";
  291. }
  292. std::string fullstr = cacheid.str();
  293. m_cpuCacheID = CacheIDHash(fullstr.c_str(), (int)fullstr.size());
  294. }
  295. return m_cpuCacheID.c_str();
  296. }
  297. ///////////////////////////////////////////////////////////////////////////
  298. const char * Processor::Impl::getGpuShaderText(const GpuShaderDesc & shaderDesc) const
  299. {
  300. AutoMutex lock(m_resultsCacheMutex);
  301. if(m_lastShaderDesc != shaderDesc.getCacheID())
  302. {
  303. m_lastShaderDesc = shaderDesc.getCacheID();
  304. m_shader = "";
  305. m_shaderCacheID = "";
  306. m_lut3D.clear();
  307. m_lut3DCacheID = "";
  308. }
  309. if(m_shader.empty())
  310. {
  311. std::ostringstream shader;
  312. calcGpuShaderText(shader, shaderDesc);
  313. m_shader = shader.str();
  314. if(IsDebugLoggingEnabled())
  315. {
  316. LogDebug("GPU Shader");
  317. LogDebug(m_shader);
  318. }
  319. }
  320. return m_shader.c_str();
  321. }
  322. const char * Processor::Impl::getGpuShaderTextCacheID(const GpuShaderDesc & shaderDesc) const
  323. {
  324. AutoMutex lock(m_resultsCacheMutex);
  325. if(m_lastShaderDesc != shaderDesc.getCacheID())
  326. {
  327. m_lastShaderDesc = shaderDesc.getCacheID();
  328. m_shader = "";
  329. m_shaderCacheID = "";
  330. m_lut3D.clear();
  331. m_lut3DCacheID = "";
  332. }
  333. if(m_shader.empty())
  334. {
  335. std::ostringstream shader;
  336. calcGpuShaderText(shader, shaderDesc);
  337. m_shader = shader.str();
  338. }
  339. if(m_shaderCacheID.empty())
  340. {
  341. m_shaderCacheID = CacheIDHash(m_shader.c_str(), (int)m_shader.size());
  342. }
  343. return m_shaderCacheID.c_str();
  344. }
  345. const char * Processor::Impl::getGpuLut3DCacheID(const GpuShaderDesc & shaderDesc) const
  346. {
  347. AutoMutex lock(m_resultsCacheMutex);
  348. if(m_lastShaderDesc != shaderDesc.getCacheID())
  349. {
  350. m_lastShaderDesc = shaderDesc.getCacheID();
  351. m_shader = "";
  352. m_shaderCacheID = "";
  353. m_lut3D.clear();
  354. m_lut3DCacheID = "";
  355. }
  356. if(m_lut3DCacheID.empty())
  357. {
  358. if(m_gpuOpsCpuLatticeProcess.empty())
  359. {
  360. m_lut3DCacheID = "<NULL>";
  361. }
  362. else
  363. {
  364. std::ostringstream cacheid;
  365. for(OpRcPtrVec::size_type i=0, size = m_gpuOpsCpuLatticeProcess.size(); i<size; ++i)
  366. {
  367. cacheid << m_gpuOpsCpuLatticeProcess[i]->getCacheID() << " ";
  368. }
  369. // Also, add a hash of the shader description
  370. cacheid << shaderDesc.getCacheID();
  371. std::string fullstr = cacheid.str();
  372. m_lut3DCacheID = CacheIDHash(fullstr.c_str(), (int)fullstr.size());
  373. }
  374. }
  375. return m_lut3DCacheID.c_str();
  376. }
  377. void Processor::Impl::getGpuLut3D(float* lut3d, const GpuShaderDesc & shaderDesc) const
  378. {
  379. if(!lut3d) return;
  380. AutoMutex lock(m_resultsCacheMutex);
  381. if(m_lastShaderDesc != shaderDesc.getCacheID())
  382. {
  383. m_lastShaderDesc = shaderDesc.getCacheID();
  384. m_shader = "";
  385. m_shaderCacheID = "";
  386. m_lut3D.clear();
  387. m_lut3DCacheID = "";
  388. }
  389. int lut3DEdgeLen = shaderDesc.getLut3DEdgeLen();
  390. int lut3DNumPixels = lut3DEdgeLen*lut3DEdgeLen*lut3DEdgeLen;
  391. // Can we write the entire shader using only shader text?
  392. // If so, the lut3D is not needed so clear it.
  393. // This is preferable to identity, as it lets people notice if
  394. // it's accidentally being used.
  395. if(m_gpuOpsCpuLatticeProcess.empty())
  396. {
  397. memset(lut3d, 0, sizeof(float) * 3 * lut3DNumPixels);
  398. return;
  399. }
  400. if(m_lut3D.empty())
  401. {
  402. // Allocate 3dlut image, RGBA
  403. m_lut3D.resize(lut3DNumPixels*4);
  404. GenerateIdentityLut3D(&m_lut3D[0], lut3DEdgeLen, 4, LUT3DORDER_FAST_RED);
  405. // Apply the lattice ops to it
  406. for(int i=0; i<(int)m_gpuOpsCpuLatticeProcess.size(); ++i)
  407. {
  408. m_gpuOpsCpuLatticeProcess[i]->apply(&m_lut3D[0], lut3DNumPixels);
  409. }
  410. // Convert the RGBA image to an RGB image, in place.
  411. // Of course, this only works because we're doing it from left to right
  412. // so old pixels are read before they're written over
  413. // TODO: is this bad for memory access patterns?
  414. // see if this is faster with a 2nd temp float array
  415. for(int i=1; i<lut3DNumPixels; ++i) // skip the 1st pixel, it's ok.
  416. {
  417. m_lut3D[3*i+0] = m_lut3D[4*i+0];
  418. m_lut3D[3*i+1] = m_lut3D[4*i+1];
  419. m_lut3D[3*i+2] = m_lut3D[4*i+2];
  420. }
  421. }
  422. // Copy to the destination
  423. memcpy(lut3d, &m_lut3D[0], sizeof(float) * 3 * lut3DNumPixels);
  424. }
  425. ///////////////////////////////////////////////////////////////////////////
  426. void Processor::Impl::addColorSpaceConversion(const Config & config,
  427. const ConstContextRcPtr & context,
  428. const ConstColorSpaceRcPtr & srcColorSpace,
  429. const ConstColorSpaceRcPtr & dstColorSpace)
  430. {
  431. BuildColorSpaceOps(m_cpuOps, config, context, srcColorSpace, dstColorSpace);
  432. }
  433. void Processor::Impl::addTransform(const Config & config,
  434. const ConstContextRcPtr & context,
  435. const ConstTransformRcPtr& transform,
  436. TransformDirection direction)
  437. {
  438. BuildOps(m_cpuOps, config, context, transform, direction);
  439. }
  440. void Processor::Impl::finalize()
  441. {
  442. // Pull out metadata, before the no-ops are removed.
  443. for(unsigned int i=0; i<m_cpuOps.size(); ++i)
  444. {
  445. m_cpuOps[i]->dumpMetadata(m_metadata);
  446. }
  447. // GPU Process setup
  448. //
  449. // Partition the original, raw opvec into 3 segments for GPU Processing
  450. //
  451. // Interior index range does not support the gpu shader.
  452. // This is used to bound our analytical shader text generation
  453. // start index and end index are inclusive.
  454. PartitionGPUOps(m_gpuOpsHwPreProcess,
  455. m_gpuOpsCpuLatticeProcess,
  456. m_gpuOpsHwPostProcess,
  457. m_cpuOps);
  458. LogDebug("GPU Ops: Pre-3DLUT");
  459. FinalizeOpVec(m_gpuOpsHwPreProcess);
  460. LogDebug("GPU Ops: 3DLUT");
  461. FinalizeOpVec(m_gpuOpsCpuLatticeProcess);
  462. LogDebug("GPU Ops: Post-3DLUT");
  463. FinalizeOpVec(m_gpuOpsHwPostProcess);
  464. LogDebug("CPU Ops");
  465. FinalizeOpVec(m_cpuOps);
  466. }
  467. void Processor::Impl::calcGpuShaderText(std::ostream & shader,
  468. const GpuShaderDesc & shaderDesc) const
  469. {
  470. std::string pixelName = "out_pixel";
  471. std::string lut3dName = "lut3d";
  472. WriteShaderHeader(shader, pixelName, shaderDesc);
  473. for(unsigned int i=0; i<m_gpuOpsHwPreProcess.size(); ++i)
  474. {
  475. m_gpuOpsHwPreProcess[i]->writeGpuShader(shader, pixelName, shaderDesc);
  476. }
  477. if(!m_gpuOpsCpuLatticeProcess.empty())
  478. {
  479. // Sample the 3D LUT.
  480. int lut3DEdgeLen = shaderDesc.getLut3DEdgeLen();
  481. shader << pixelName << ".rgb = ";
  482. Write_sampleLut3D_rgb(shader, pixelName,
  483. lut3dName, lut3DEdgeLen,
  484. shaderDesc.getLanguage());
  485. }
  486. #ifdef __APPLE__
  487. else
  488. {
  489. // Force a no-op sampling of the 3d lut on OSX to work around a segfault.
  490. int lut3DEdgeLen = shaderDesc.getLut3DEdgeLen();
  491. shader << "// OSX segfault work-around: Force a no-op sampling of the 3d lut.\n";
  492. Write_sampleLut3D_rgb(shader, pixelName,
  493. lut3dName, lut3DEdgeLen,
  494. shaderDesc.getLanguage());
  495. }
  496. #endif // __APPLE__
  497. for(unsigned int i=0; i<m_gpuOpsHwPostProcess.size(); ++i)
  498. {
  499. m_gpuOpsHwPostProcess[i]->writeGpuShader(shader, pixelName, shaderDesc);
  500. }
  501. WriteShaderFooter(shader, pixelName, shaderDesc);
  502. }
  503. }
  504. OCIO_NAMESPACE_EXIT