3 #ifndef RENDERER_STATE_OPENCL_H
4 #define RENDERER_STATE_OPENCL_H
14 #if ( SETTING_OPENCL != 0 )
19 #include "renderer_state_base.h"
51 inline State( std::shared_ptr< OpenCL::State >
const& pState );
54 template<
int t_ColorFormat >
56 uint8_t*
const colors,
58 unsigned int const pitch,
68 std::shared_ptr< OpenCL::State > m_pState;
69 std::unique_ptr< _cl_kernel, OpenCL::Deleter< _cl_kernel > > m_kernel;
80 State< PLATFORM_OPENCL >::State( std::shared_ptr< OpenCL::State >
const& pState ) : m_pState( pState ) {
82 cl_int result = CL_SUCCESS;
84 static char const*
const filenames[] = {
"settings.h",
"trace.cl", NULL };
85 std::unique_ptr< OpenCL::Program > pProgram(
new OpenCL::Program( m_pState, filenames,
"-Werror -cl-nv-verbose -cl-no-signed-zeros -cl-fast-relaxed-math",
"build.log" ) );
87 m_kernel.reset( clCreateKernel( pProgram->BuiltProgram(),
"TraceScreen", &result ) );
88 if ( result != CL_SUCCESS )
93 template<
int t_ColorFormat >
94 void State< PLATFORM_OPENCL >::TraceScreen(
95 uint8_t*
const colors,
97 unsigned int const pitch,
112 double const spread = 2 * ( fieldOfView / dimension ).Minimum();
114 cl_int result = CL_SUCCESS;
116 cl_uint
const oclColorsPitch = ( dimension[ 0 ] *
sizeof( cl_uint ) + 0x3f ) & ~0x3f;
117 std::unique_ptr< _cl_mem, OpenCL::Deleter< _cl_mem > > oclColors( clCreateBuffer( m_pState->Context(), CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, dimension[ 1 ] * oclColorsPitch, NULL, &result ) );
118 if ( result != CL_SUCCESS )
121 cl_uint
const oclDepthsPitch = ( dimension[ 0 ] *
sizeof( cl_float ) + 0x3f ) & ~0x3f;
122 std::unique_ptr< _cl_mem, OpenCL::Deleter< _cl_mem > > oclDepths( clCreateBuffer( m_pState->Context(), CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, dimension[ 1 ] * oclDepthsPitch, NULL, &result ) );
123 if ( result != CL_SUCCESS )
126 { cl_mem
const oclColorsPointer = oclColors.get();
127 cl_mem
const oclDepthsPointer = oclDepths.get();
128 cl_uint2
const oclDimension = { { dimension[ 0 ], dimension[ 1 ] } };
129 cl_float2
const oclFieldOfView = { {
static_cast< cl_float
>( fieldOfView[ 0 ] ), static_cast< cl_float >( fieldOfView[ 1 ] ) } };
130 cl_float
const oclSpread =
static_cast< cl_float
>( spread );
134 cl_float3
const oclXX = { {
static_cast< cl_float
>( xx[ 0 ] ), static_cast< cl_float >( xx[ 1 ] ),
static_cast< cl_float
>( xx[ 2 ] ) } };
135 cl_float3
const oclYY = { {
static_cast< cl_float
>( yy[ 0 ] ), static_cast< cl_float >( yy[ 1 ] ),
static_cast< cl_float
>( yy[ 2 ] ) } };
136 cl_float3
const oclZZ = { {
static_cast< cl_float
>( zz[ 0 ] ), static_cast< cl_float >( zz[ 1 ] ),
static_cast< cl_float
>( zz[ 2 ] ) } };
137 cl_float3
const oclPP = { {
static_cast< cl_float
>( position[ 0 ] ), static_cast< cl_float >( position[ 1 ] ),
static_cast< cl_float
>( position[ 2 ] ) } };
139 unsigned int index = 0;
140 CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++,
sizeof( oclColorsPointer ), &oclColorsPointer ) );
141 CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++,
sizeof( oclDepthsPointer ), &oclDepthsPointer ) );
142 CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++,
sizeof( oclDimension ), &oclDimension ) );
143 CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++,
sizeof( oclColorsPitch ), &oclColorsPitch ) );
144 CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++,
sizeof( oclDepthsPitch ), &oclDepthsPitch ) );
145 CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++,
sizeof( oclFieldOfView ), &oclFieldOfView ) );
146 CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++,
sizeof( oclSpread ), &oclSpread ) );
147 CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++,
sizeof( oclHeap ), &oclHeap ) );
148 CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++,
sizeof( oclTextureHeap ), &oclTextureHeap ) );
149 CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++,
sizeof( oclRoot ), &oclRoot ) );
150 CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++,
sizeof( oclXX ), &oclXX ) );
151 CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++,
sizeof( oclYY ), &oclYY ) );
152 CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++,
sizeof( oclZZ ), &oclZZ ) );
153 CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++,
sizeof( oclPP ), &oclPP ) );
155 size_t const localWorkSize[] = { 320 };
156 size_t const workSize[] = { ( ( dimension[ 0 ] * dimension[ 1 ] + localWorkSize[ 0 ] - 1 ) / localWorkSize[ 0 ] ) * localWorkSize[ 0 ] };
157 result = clEnqueueNDRangeKernel( m_pState->Queue(), m_kernel.get(), 1, NULL, workSize, localWorkSize, 0, NULL, NULL );
158 if ( result != CL_SUCCESS )
162 size_t const colorsSize[] = { dimension[ 0 ] *
sizeof( cl_uint ), dimension[ 1 ], 1 };
163 size_t const colorsOrigin[] = { 0, 0, 0 };
164 result = clEnqueueReadBufferRect( m_pState->Queue(), oclColors.get(), CL_TRUE, colorsOrigin, colorsOrigin, colorsSize, colorsSize[ 0 ], 0, pitch, 0, colors, 0, NULL, NULL );
165 if ( result != CL_SUCCESS )
177 #endif // SETTING_OPENCL
182 #endif // RENDERER_STATE_OPENCL_H