fastconv.hpp | searchcode

/sourceryvsipl--/sourceryvsipl++-lite-2.1/src/src/vsip/opt/cuda/fastconv.hpp

https://github.com/somaproject/thirdparty-packages · C++ Header · 298 lines · 218 code · 49 blank · 31 comment · 22 complexity · 3195d25961d7a3f9054699a933b16df3 MD5 · raw file

/* Copyright (c) 2009 by CodeSourcery.  All rights reserved.

   This file is available for license from CodeSourcery, Inc. under the terms
   of a commercial license and under the GPL.  It is not part of the VSIPL++
   reference implementation and is not available under the BSD license.
*/
/** @file    vsip/opt/cuda/fastconv.hpp
    @author  Don McCoy
    @date    2009-03-22
    @brief   VSIPL++ Library: Wrapper for fast convolution using CUDA.
*/

#ifndef VSIP_OPT_CUDA_FASTCONV_HPP
#define VSIP_OPT_CUDA_FASTCONV_HPP

/***********************************************************************
  Included Files
***********************************************************************/

#include <vsip/core/allocation.hpp>
#include <vsip/core/config.hpp>
#include <vsip/core/extdata.hpp>
#include <vsip/opt/cuda/bindings.hpp>

/***********************************************************************
  Declarations
***********************************************************************/

namespace vsip
{
namespace impl
{
namespace cuda
{

template <dimension_type D,
          typename T,
	  typename ComplexFmt> 
struct Fastconv_traits;
template <>
struct Fastconv_traits<1, std::complex<float>, Cmplx_inter_fmt>
{
  static length_type const min_size = 16;
  static length_type const max_size = 8000000;
};
template <>
struct Fastconv_traits<2, std::complex<float>, Cmplx_inter_fmt>
{
  static length_type const min_size = 16;
  static length_type const max_size = 8000000;
};


/// Fast convolution object 
///
/// Template parameters:
///   D to specify the dimensionality of the kernel (either a 1 or 2)
///   T to be the value type of data that will be processed.
///   ComplexFmt to be the complex format (either Cmplx_inter_fmt or
///     Cmplx_split_fmt) to be processed.
template <dimension_type D,
          typename       T,
	  typename       ComplexFmt>
class Fastconv_base
{
  static dimension_type const dim = D;

  typedef ComplexFmt complex_type;
  typedef Layout<1, row1_type, Stride_unit_dense, complex_type> layout1_type;
  typedef Layout<2, row2_type, Stride_unit_dense, complex_type> layout2_type;

public:
  Fastconv_base(length_type const input_size, bool transform_kernel)
    : size_            (input_size),
      transform_kernel_(transform_kernel)
  {
    assert(rt_valid_size(this->size_));
  }

  static bool rt_valid_size(length_type size)
  {
    return (size >= cuda::Fastconv_traits<dim, T, complex_type>::min_size &&
            size <= cuda::Fastconv_traits<dim, T, complex_type>::max_size);
  }


  template <typename Block0, typename Block1, typename Block2>
  void convolve(const_Vector<T, Block0> in, const_Vector<T, Block1> kernel, Vector<T, Block2> out)
  {
    Ext_data<Block0, layout1_type> ext_in    (in.block(),     SYNC_IN);
    Ext_data<Block1, layout1_type> ext_kernel(kernel.block(), SYNC_IN);
    Ext_data<Block2, layout1_type> ext_out   (out.block(),    SYNC_OUT);
    assert(dim == 1);
    assert(ext_in.stride(0) == 1);
    assert(ext_kernel.stride(0) == 1);
    assert(ext_out.stride(0) == 1);

    length_type rows = 1;
    fconv(ext_in.data(), ext_kernel.data(), ext_out.data(), rows, out.size(0), transform_kernel_);
  }

  template <typename Block0, typename Block1, typename Block2>
  void convolve(const_Matrix<T, Block0> in, const_Vector<T, Block1> kernel, Matrix<T, Block2> out)
  {
    Ext_data<Block0, layout2_type> ext_in    (in.block(),     SYNC_IN);
    Ext_data<Block1, layout1_type> ext_kernel(kernel.block(), SYNC_IN);
    Ext_data<Block2, layout2_type> ext_out   (out.block(),    SYNC_OUT);
    assert(dim == 1);
    assert(ext_in.stride(1) == 1);
    assert(ext_kernel.stride(0) == 1);
    assert(ext_out.stride(1) == 1);

    length_type rows = in.size(0);
    fconv(ext_in.data(), ext_kernel.data(), ext_out.data(), rows, out.size(1), transform_kernel_);
  }

  template <typename Block0, typename Block1, typename Block2>
  void convolve(const_Matrix<T, Block0> in, const_Matrix<T, Block1> kernel, Matrix<T, Block2> out)
  {
    Ext_data<Block0, layout2_type> ext_in    (in.block(),     SYNC_IN);
    Ext_data<Block1, layout2_type> ext_kernel(kernel.block(), SYNC_IN);
    Ext_data<Block2, layout2_type> ext_out   (out.block(),    SYNC_OUT);
    assert(dim == 2);
    assert(ext_in.stride(1) == 1);
    assert(ext_kernel.stride(1) == 1);
    assert(ext_out.stride(1) == 1);

    length_type rows = in.size(0);
    fconv(ext_in.data(), ext_kernel.data(), ext_out.data(), rows, out.size(1), transform_kernel_);
  }

  length_type size() { return size_; }

private:
  typedef typename Scalar_of<T>::type uT;
  void fconv(T const* in, T const* kernel, T* out, 
	     length_type rows, length_type length, bool transform_kernel);

  // Member data.
  length_type size_;
  bool transform_kernel_;
};



template <dimension_type D,
          typename T,
	  typename ComplexFmt = Cmplx_inter_fmt>
class Fastconv;

template <typename T, typename ComplexFmt>
class Fastconv<1, T, ComplexFmt> : public Fastconv_base<1, T, ComplexFmt>
{
  // Constructors, copies, assignments, and destructors.
public:

  template <typename Block>
  Fastconv(Vector<T, Block> coeffs,
           length_type input_size,
	   bool transform_kernel = true)
    VSIP_THROW((std::bad_alloc))
    : Fastconv_base<1, T, ComplexFmt>(input_size, transform_kernel),
      kernel_(input_size)
  {
    assert(coeffs.size(0) <= this->size());
    if (transform_kernel)
    {
      kernel_ = T();
      kernel_(view_domain(coeffs.local())) = coeffs.local();
    }
    else
      kernel_ = coeffs.local();
  }
  ~Fastconv() VSIP_NOTHROW {}

  // Fastconv operators.
  template <typename Block1,
	    typename Block2>
  Vector<T, Block2>
  operator()(
    const_Vector<T, Block1> in, 
    Vector<T, Block2>       out)
      VSIP_NOTHROW
  {
    assert(in.size() == this->size());
    assert(out.size() == this->size());
    
    this->convolve(in.local(), this->kernel_, out.local());
    
    return out;
  }

  template <typename Block1,
	    typename Block2>
  Matrix<T, Block2>
  operator()(
    const_Matrix<T, Block1> in, 
    Matrix<T, Block2>       out)
    VSIP_NOTHROW
  {
    assert(in.size(1) == this->size());
    assert(out.size(1) == this->size());

    this->convolve(in.local(), this->kernel_, out.local());
    
    return out;
  }

private:
  typedef ComplexFmt complex_type;
  typedef Layout<1, row1_type, 
                 Stride_unit_dense, complex_type>   kernel_layout_type;
  typedef Fast_block<1, T, 
                     kernel_layout_type, Local_map> kernel_block_type;
  typedef Vector<T, kernel_block_type>              kernel_view_type;

  // Member data.
  kernel_view_type kernel_;
};



template <typename T, typename ComplexFmt>
class Fastconv<2, T, ComplexFmt> : public Fastconv_base<2, T, ComplexFmt>
{
  // Constructors, copies, assignments, and destructors.
public:

  template <typename Block>
  Fastconv(Matrix<T, Block> coeffs,
           length_type input_size,
	   bool transform_kernel = true)
    VSIP_THROW((std::bad_alloc))
    : Fastconv_base<2, T, ComplexFmt>(input_size, transform_kernel),
      kernel_(coeffs.local().size(0), input_size)
  {
    assert(coeffs.size(1) <= this->size());
    if (transform_kernel)
    {
      kernel_ = T();
      kernel_(view_domain(coeffs.local())) = coeffs.local();
    }
    else
      kernel_ = coeffs.local();
  }
  ~Fastconv() VSIP_NOTHROW {}

  // Fastconv operators.
  template <typename Block1,
	    typename Block2>
  Vector<T, Block2>
  operator()(
    const_Vector<T, Block1> in, 
    Vector<T, Block2>       out)
      VSIP_NOTHROW
  {
    assert(in.size() == this->size());
    assert(out.size() == this->size());
    
    this->convolve(in.local(), this->kernel_, out.local());
    
    return out;
  }

  template <typename Block1,
	    typename Block2>
  Matrix<T, Block2>
  operator()(
    const_Matrix<T, Block1> in, 
    Matrix<T, Block2>       out)
    VSIP_NOTHROW
  {
    assert(in.size(1) == this->size());
    assert(out.size(1) == this->size());
    
    this->convolve(in.local(), this->kernel_, out.local());
    
    return out;
  }

private:
  // Member data.
  typedef ComplexFmt complex_type;
  typedef Layout<2, row2_type, 
                 Stride_unit_dense, complex_type>   kernel_layout_type;
  typedef Fast_block<2, T, 
                     kernel_layout_type, Local_map> kernel_block_type;
  typedef Matrix<T, kernel_block_type>              kernel_view_type;

  kernel_view_type kernel_;
};


} // namespace vsip::impl::cuda
} // namespace vsip::impl
} // namespace vsip

#endif // VSIP_OPT_CUDA_FASTCONV_HPP
Alerts (12)

Complexity hotspot; lines 93 to 96 (total complexity: 4)
93 94 95 96
Complexity hotspot; lines 108 to 111 (total complexity: 4)
108 109 110 111
Complexity hotspot; lines 123 to 126 (total complexity: 4)
123 124 125 126