PageRenderTime 74ms CodeModel.GetById 10ms app.highlight 59ms RepoModel.GetById 1ms app.codeStats 0ms

/src/core/Processor.cpp

http://github.com/imageworks/OpenColorIO
C++ | 640 lines | 447 code | 135 blank | 58 comment | 57 complexity | 9caaa3022926517866979b3e2dd83f3d MD5 | raw file
  1/*
  2Copyright (c) 2003-2010 Sony Pictures Imageworks Inc., et al.
  3All Rights Reserved.
  4
  5Redistribution and use in source and binary forms, with or without
  6modification, are permitted provided that the following conditions are
  7met:
  8* Redistributions of source code must retain the above copyright
  9  notice, this list of conditions and the following disclaimer.
 10* Redistributions in binary form must reproduce the above copyright
 11  notice, this list of conditions and the following disclaimer in the
 12  documentation and/or other materials provided with the distribution.
 13* Neither the name of Sony Pictures Imageworks nor the names of its
 14  contributors may be used to endorse or promote products derived from
 15  this software without specific prior written permission.
 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 17"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 18LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 19A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 20OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 21SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 22LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 23DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 24THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 25(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 26OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27*/
 28
 29#include <OpenColorIO/OpenColorIO.h>
 30
 31#include "AllocationOp.h"
 32#include "GpuShaderUtils.h"
 33#include "HashUtils.h"
 34#include "Logging.h"
 35#include "Lut3DOp.h"
 36#include "NoOps.h"
 37#include "OpBuilders.h"
 38#include "Processor.h"
 39#include "ScanlineHelper.h"
 40
 41#include <algorithm>
 42#include <cstring>
 43#include <sstream>
 44
 45OCIO_NAMESPACE_ENTER
 46{
 47
 48
 49
 50    //////////////////////////////////////////////////////////////////////////
 51    
 52    class ProcessorMetadata::Impl
 53    {
 54    public:
 55        StringSet files;
 56        StringVec looks;
 57        
 58        Impl()
 59        { }
 60        
 61        ~Impl()
 62        { }
 63    };
 64    
 65    ProcessorMetadataRcPtr ProcessorMetadata::Create()
 66    {
 67        return ProcessorMetadataRcPtr(new ProcessorMetadata(), &deleter);
 68    }
 69    
 70    ProcessorMetadata::ProcessorMetadata()
 71        : m_impl(new ProcessorMetadata::Impl)
 72    { }
 73    
 74    ProcessorMetadata::~ProcessorMetadata()
 75    {
 76        delete m_impl;
 77        m_impl = NULL;
 78    }
 79    
 80    void ProcessorMetadata::deleter(ProcessorMetadata* c)
 81    {
 82        delete c;
 83    }
 84    
 85    int ProcessorMetadata::getNumFiles() const
 86    {
 87        return static_cast<int>(getImpl()->files.size());
 88    }
 89    
 90    const char * ProcessorMetadata::getFile(int index) const
 91    {
 92        if(index < 0 ||
 93           index >= (static_cast<int>(getImpl()->files.size())))
 94        {
 95            return "";
 96        }
 97        
 98        StringSet::const_iterator iter = getImpl()->files.begin();
 99        std::advance( iter, index );
100        
101        return iter->c_str();
102    }
103    
104    void ProcessorMetadata::addFile(const char * fname)
105    {
106        getImpl()->files.insert(fname);
107    }
108    
109    
110    
111    int ProcessorMetadata::getNumLooks() const
112    {
113        return static_cast<int>(getImpl()->looks.size());
114    }
115    
116    const char * ProcessorMetadata::getLook(int index) const
117    {
118        if(index < 0 ||
119           index >= (static_cast<int>(getImpl()->looks.size())))
120        {
121            return "";
122        }
123        
124        return getImpl()->looks[index].c_str();
125    }
126    
127    void ProcessorMetadata::addLook(const char * look)
128    {
129        getImpl()->looks.push_back(look);
130    }
131    
132    
133    
134    //////////////////////////////////////////////////////////////////////////
135    
136    
137    ProcessorRcPtr Processor::Create()
138    {
139        return ProcessorRcPtr(new Processor(), &deleter);
140    }
141    
142    void Processor::deleter(Processor* c)
143    {
144        delete c;
145    }
146    
147    Processor::Processor()
148    : m_impl(new Processor::Impl)
149    {
150    }
151    
152    Processor::~Processor()
153    {
154        delete m_impl;
155        m_impl = NULL;
156    }
157    
158    bool Processor::isNoOp() const
159    {
160        return getImpl()->isNoOp();
161    }
162    
163    bool Processor::hasChannelCrosstalk() const
164    {
165        return getImpl()->hasChannelCrosstalk();
166    }
167    
168    ConstProcessorMetadataRcPtr Processor::getMetadata() const
169    {
170        return getImpl()->getMetadata();
171    }
172    
173    void Processor::apply(ImageDesc& img) const
174    {
175        getImpl()->apply(img);
176    }
177    void Processor::applyRGB(float * pixel) const
178    {
179        getImpl()->applyRGB(pixel);
180    }
181    
182    void Processor::applyRGBA(float * pixel) const
183    {
184        getImpl()->applyRGBA(pixel);
185    }
186    
187    const char * Processor::getCpuCacheID() const
188    {
189        return getImpl()->getCpuCacheID();
190    }
191    
192    const char * Processor::getGpuShaderText(const GpuShaderDesc & shaderDesc) const
193    {
194        return getImpl()->getGpuShaderText(shaderDesc);
195    }
196    
197    const char * Processor::getGpuShaderTextCacheID(const GpuShaderDesc & shaderDesc) const
198    {
199        return getImpl()->getGpuShaderTextCacheID(shaderDesc);
200    }
201    
202    void Processor::getGpuLut3D(float* lut3d, const GpuShaderDesc & shaderDesc) const
203    {
204        return getImpl()->getGpuLut3D(lut3d, shaderDesc);
205    }
206    
207    const char * Processor::getGpuLut3DCacheID(const GpuShaderDesc & shaderDesc) const
208    {
209        return getImpl()->getGpuLut3DCacheID(shaderDesc);
210    }
211    
212    
213    
214    //////////////////////////////////////////////////////////////////////////
215    
216    
217    
218    namespace
219    {
220        void WriteShaderHeader(std::ostream & shader,
221                               const std::string & pixelName,
222                               const GpuShaderDesc & shaderDesc)
223        {
224            if(!shader) return;
225            
226            std::string lut3dName = "lut3d";
227            
228            shader << "\n// Generated by OpenColorIO\n\n";
229            
230            GpuLanguage lang = shaderDesc.getLanguage();
231            
232            std::string fcnName = shaderDesc.getFunctionName();
233            
234            if(lang == GPU_LANGUAGE_CG)
235            {
236                shader << "half4 " << fcnName << "(in half4 inPixel," << "\n";
237                shader << "    const uniform sampler3D " << lut3dName << ") \n";
238            }
239            else if(lang == GPU_LANGUAGE_GLSL_1_0)
240            {
241                shader << "vec4 " << fcnName << "(vec4 inPixel, \n";
242                shader << "    sampler3D " << lut3dName << ") \n";
243            }
244            else if(lang == GPU_LANGUAGE_GLSL_1_3)
245            {
246                shader << "vec4 " << fcnName << "(in vec4 inPixel, \n";
247                shader << "    const sampler3D " << lut3dName << ") \n";
248            }
249            else throw Exception("Unsupported shader language.");
250            
251            shader << "{" << "\n";
252            
253            if(lang == GPU_LANGUAGE_CG)
254            {
255                shader << "half4 " << pixelName << " = inPixel; \n";
256            }
257            else if(lang == GPU_LANGUAGE_GLSL_1_0 || lang == GPU_LANGUAGE_GLSL_1_3)
258            {
259                shader << "vec4 " << pixelName << " = inPixel; \n";
260            }
261            else throw Exception("Unsupported shader language.");
262        }
263        
264        
265        void WriteShaderFooter(std::ostream & shader,
266                               const std::string & pixelName,
267                               const GpuShaderDesc & /*shaderDesc*/)
268        {
269            shader << "return " << pixelName << ";\n";
270            shader << "}" << "\n\n";
271        }
272    }
273    
274    
275    //////////////////////////////////////////////////////////////////////////
276    
277    
278    Processor::Impl::Impl():
279        m_metadata(ProcessorMetadata::Create())
280    {
281    }
282    
283    Processor::Impl::~Impl()
284    { }
285    
286    bool Processor::Impl::isNoOp() const
287    {
288        return IsOpVecNoOp(m_cpuOps);
289    }
290    
291    bool Processor::Impl::hasChannelCrosstalk() const
292    {
293        for(OpRcPtrVec::size_type i=0, size = m_cpuOps.size(); i<size; ++i)
294        {
295            if(m_cpuOps[i]->hasChannelCrosstalk()) return true;
296        }
297        
298        return false;
299    }
300    
301    ConstProcessorMetadataRcPtr Processor::Impl::getMetadata() const
302    {
303        return m_metadata;
304    }
305    
306    void Processor::Impl::apply(ImageDesc& img) const
307    {
308        if(m_cpuOps.empty()) return;
309        
310        ScanlineHelper scanlineHelper(img);
311        float * rgbaBuffer = 0;
312        long numPixels = 0;
313        
314        while(true)
315        {
316            scanlineHelper.prepRGBAScanline(&rgbaBuffer, &numPixels);
317            if(numPixels == 0) break;
318            if(!rgbaBuffer)
319                throw Exception("Cannot apply transform; null image.");
320            
321            for(OpRcPtrVec::size_type i=0, size = m_cpuOps.size(); i<size; ++i)
322            {
323                m_cpuOps[i]->apply(rgbaBuffer, numPixels);
324            }
325            
326            scanlineHelper.finishRGBAScanline();
327        }
328    }
329    
330    void Processor::Impl::applyRGB(float * pixel) const
331    {
332        if(m_cpuOps.empty()) return;
333        
334        // We need to allocate a temp array as the pixel must be 4 floats in size
335        // (otherwise, sse loads will potentially fail)
336        
337        float rgbaBuffer[4] = { pixel[0], pixel[1], pixel[2], 0.0f };
338        
339        for(OpRcPtrVec::size_type i=0, size = m_cpuOps.size(); i<size; ++i)
340        {
341            m_cpuOps[i]->apply(rgbaBuffer, 1);
342        }
343        
344        pixel[0] = rgbaBuffer[0];
345        pixel[1] = rgbaBuffer[1];
346        pixel[2] = rgbaBuffer[2];
347    }
348    
349    void Processor::Impl::applyRGBA(float * pixel) const
350    {
351        for(OpRcPtrVec::size_type i=0, size = m_cpuOps.size(); i<size; ++i)
352        {
353            m_cpuOps[i]->apply(pixel, 1);
354        }
355    }
356    
357    const char * Processor::Impl::getCpuCacheID() const
358    {
359        AutoMutex lock(m_resultsCacheMutex);
360        
361        if(!m_cpuCacheID.empty()) return m_cpuCacheID.c_str();
362        
363        if(m_cpuOps.empty())
364        {
365            m_cpuCacheID = "<NOOP>";
366        }
367        else
368        {
369            std::ostringstream cacheid;
370            for(OpRcPtrVec::size_type i=0, size = m_cpuOps.size(); i<size; ++i)
371            {
372                cacheid << m_cpuOps[i]->getCacheID() << " ";
373            }
374            std::string fullstr = cacheid.str();
375            
376            m_cpuCacheID = CacheIDHash(fullstr.c_str(), (int)fullstr.size());
377        }
378        
379        return m_cpuCacheID.c_str();
380    }
381    
382    
383    ///////////////////////////////////////////////////////////////////////////
384    
385    
386    
387    
388    const char * Processor::Impl::getGpuShaderText(const GpuShaderDesc & shaderDesc) const
389    {
390        AutoMutex lock(m_resultsCacheMutex);
391        
392        if(m_lastShaderDesc != shaderDesc.getCacheID())
393        {
394            m_lastShaderDesc = shaderDesc.getCacheID();
395            m_shader = "";
396            m_shaderCacheID = "";
397            m_lut3D.clear();
398            m_lut3DCacheID = "";
399        }
400        
401        if(m_shader.empty())
402        {
403            std::ostringstream shader;
404            calcGpuShaderText(shader, shaderDesc);
405            m_shader = shader.str();
406            
407            if(IsDebugLoggingEnabled())
408            {
409                LogDebug("GPU Shader");
410                LogDebug(m_shader);
411            }
412        }
413        
414        return m_shader.c_str();
415    }
416    
417    const char * Processor::Impl::getGpuShaderTextCacheID(const GpuShaderDesc & shaderDesc) const
418    {
419        AutoMutex lock(m_resultsCacheMutex);
420        
421        if(m_lastShaderDesc != shaderDesc.getCacheID())
422        {
423            m_lastShaderDesc = shaderDesc.getCacheID();
424            m_shader = "";
425            m_shaderCacheID = "";
426            m_lut3D.clear();
427            m_lut3DCacheID = "";
428        }
429        
430        if(m_shader.empty())
431        {
432            std::ostringstream shader;
433            calcGpuShaderText(shader, shaderDesc);
434            m_shader = shader.str();
435        }
436        
437        if(m_shaderCacheID.empty())
438        {
439            m_shaderCacheID = CacheIDHash(m_shader.c_str(), (int)m_shader.size());
440        }
441        
442        return m_shaderCacheID.c_str();
443    }
444    
445    
446    const char * Processor::Impl::getGpuLut3DCacheID(const GpuShaderDesc & shaderDesc) const
447    {
448        AutoMutex lock(m_resultsCacheMutex);
449        
450        if(m_lastShaderDesc != shaderDesc.getCacheID())
451        {
452            m_lastShaderDesc = shaderDesc.getCacheID();
453            m_shader = "";
454            m_shaderCacheID = "";
455            m_lut3D.clear();
456            m_lut3DCacheID = "";
457        }
458        
459        if(m_lut3DCacheID.empty())
460        {
461            if(m_gpuOpsCpuLatticeProcess.empty())
462            {
463                m_lut3DCacheID = "<NULL>";
464            }
465            else
466            {
467                std::ostringstream cacheid;
468                for(OpRcPtrVec::size_type i=0, size = m_gpuOpsCpuLatticeProcess.size(); i<size; ++i)
469                {
470                    cacheid << m_gpuOpsCpuLatticeProcess[i]->getCacheID() << " ";
471                }
472                // Also, add a hash of the shader description
473                cacheid << shaderDesc.getCacheID();
474                std::string fullstr = cacheid.str();
475                m_lut3DCacheID = CacheIDHash(fullstr.c_str(), (int)fullstr.size());
476            }
477        }
478        
479        return m_lut3DCacheID.c_str();
480    }
481    
482    void Processor::Impl::getGpuLut3D(float* lut3d, const GpuShaderDesc & shaderDesc) const
483    {
484        if(!lut3d) return;
485        
486        AutoMutex lock(m_resultsCacheMutex);
487        
488        if(m_lastShaderDesc != shaderDesc.getCacheID())
489        {
490            m_lastShaderDesc = shaderDesc.getCacheID();
491            m_shader = "";
492            m_shaderCacheID = "";
493            m_lut3D.clear();
494            m_lut3DCacheID = "";
495        }
496        
497        int lut3DEdgeLen = shaderDesc.getLut3DEdgeLen();
498        int lut3DNumPixels = lut3DEdgeLen*lut3DEdgeLen*lut3DEdgeLen;
499        
500        // Can we write the entire shader using only shader text?
501        // If so, the lut3D is not needed so clear it.
502        // This is preferable to identity, as it lets people notice if
503        // it's accidentally being used.
504        if(m_gpuOpsCpuLatticeProcess.empty())
505        {
506            memset(lut3d, 0, sizeof(float) * 3 * lut3DNumPixels);
507            return;
508        }
509        
510        if(m_lut3D.empty())
511        {
512            // Allocate 3dlut image, RGBA
513            m_lut3D.resize(lut3DNumPixels*4);
514            GenerateIdentityLut3D(&m_lut3D[0], lut3DEdgeLen, 4, LUT3DORDER_FAST_RED);
515            
516            // Apply the lattice ops to it
517            for(int i=0; i<(int)m_gpuOpsCpuLatticeProcess.size(); ++i)
518            {
519                m_gpuOpsCpuLatticeProcess[i]->apply(&m_lut3D[0], lut3DNumPixels);
520            }
521            
522            // Convert the RGBA image to an RGB image, in place.
523            // Of course, this only works because we're doing it from left to right
524            // so old pixels are read before they're written over
525            // TODO: is this bad for memory access patterns?
526            //       see if this is faster with a 2nd temp float array
527            
528            for(int i=1; i<lut3DNumPixels; ++i) // skip the 1st pixel, it's ok.
529            {
530                m_lut3D[3*i+0] = m_lut3D[4*i+0];
531                m_lut3D[3*i+1] = m_lut3D[4*i+1];
532                m_lut3D[3*i+2] = m_lut3D[4*i+2];
533            }
534        }
535        
536        // Copy to the destination
537        memcpy(lut3d, &m_lut3D[0], sizeof(float) * 3 * lut3DNumPixels);
538    }
539    
540    
541    
542    ///////////////////////////////////////////////////////////////////////////
543    
544    
545    
546    void Processor::Impl::addColorSpaceConversion(const Config & config,
547                                 const ConstContextRcPtr & context,
548                                 const ConstColorSpaceRcPtr & srcColorSpace,
549                                 const ConstColorSpaceRcPtr & dstColorSpace)
550    {
551        BuildColorSpaceOps(m_cpuOps, config, context, srcColorSpace, dstColorSpace);
552    }
553    
554    
555    void Processor::Impl::addTransform(const Config & config,
556                      const ConstContextRcPtr & context,
557                      const ConstTransformRcPtr& transform,
558                      TransformDirection direction)
559    {
560        BuildOps(m_cpuOps, config, context, transform, direction);
561    }
562    
563    void Processor::Impl::finalize()
564    {
565        // Pull out metadata, before the no-ops are removed.
566        for(unsigned int i=0; i<m_cpuOps.size(); ++i)
567        {
568            m_cpuOps[i]->dumpMetadata(m_metadata);
569        }
570        
571        // GPU Process setup
572        //
573        // Partition the original, raw opvec into 3 segments for GPU Processing
574        //
575        // Interior index range does not support the gpu shader.
576        // This is used to bound our analytical shader text generation
577        // start index and end index are inclusive.
578        
579        PartitionGPUOps(m_gpuOpsHwPreProcess,
580                        m_gpuOpsCpuLatticeProcess,
581                        m_gpuOpsHwPostProcess,
582                        m_cpuOps);
583        
584        LogDebug("GPU Ops: Pre-3DLUT");
585        FinalizeOpVec(m_gpuOpsHwPreProcess);
586        
587        LogDebug("GPU Ops: 3DLUT");
588        FinalizeOpVec(m_gpuOpsCpuLatticeProcess);
589        
590        LogDebug("GPU Ops: Post-3DLUT");
591        FinalizeOpVec(m_gpuOpsHwPostProcess);
592        
593        LogDebug("CPU Ops");
594        FinalizeOpVec(m_cpuOps);
595    }
596    
597    void Processor::Impl::calcGpuShaderText(std::ostream & shader,
598                                            const GpuShaderDesc & shaderDesc) const
599    {
600        std::string pixelName = "out_pixel";
601        std::string lut3dName = "lut3d";
602        
603        WriteShaderHeader(shader, pixelName, shaderDesc);
604        
605        
606        for(unsigned int i=0; i<m_gpuOpsHwPreProcess.size(); ++i)
607        {
608            m_gpuOpsHwPreProcess[i]->writeGpuShader(shader, pixelName, shaderDesc);
609        }
610        
611        if(!m_gpuOpsCpuLatticeProcess.empty())
612        {
613            // Sample the 3D LUT.
614            int lut3DEdgeLen = shaderDesc.getLut3DEdgeLen();
615            shader << pixelName << ".rgb = ";
616            Write_sampleLut3D_rgb(shader, pixelName,
617                                  lut3dName, lut3DEdgeLen,
618                                  shaderDesc.getLanguage());
619        }
620#ifdef __APPLE__
621        else
622        {
623            // Force a no-op sampling of the 3d lut on OSX to work around a segfault.
624            int lut3DEdgeLen = shaderDesc.getLut3DEdgeLen();
625            shader << "// OSX segfault work-around: Force a no-op sampling of the 3d lut.\n";
626            Write_sampleLut3D_rgb(shader, pixelName,
627                                  lut3dName, lut3DEdgeLen,
628                                  shaderDesc.getLanguage());
629        }
630#endif // __APPLE__
631        for(unsigned int i=0; i<m_gpuOpsHwPostProcess.size(); ++i)
632        {
633            m_gpuOpsHwPostProcess[i]->writeGpuShader(shader, pixelName, shaderDesc);
634        }
635        
636        WriteShaderFooter(shader, pixelName, shaderDesc);
637    }
638    
639}
640OCIO_NAMESPACE_EXIT