Voxel
 All Classes Namespaces Files Functions Typedefs Enumerations Enumerator Macros Pages
renderer_state_opencl.h
1 #pragma once
2 
3 #ifndef RENDERER_STATE_OPENCL_H
4 #define RENDERER_STATE_OPENCL_H
5 
6 
7 
8 
9 #include "settings.h"
10 
11 
12 
13 
14 #if ( SETTING_OPENCL != 0 )
15 
16 
17 
18 
19 #include "renderer_state_base.h"
20 #include "renderer_camera.h"
21 #include "synchronized.h"
22 #include "platform.h"
23 #include "octree.h"
24 #include "opencl.h"
25 #include "vector.h"
26 #include "helpers.h"
27 #include "settings.h"
28 
29 #include <CL/cl.h>
30 
31 #include <array>
32 
33 #include <stdint.h>
34 
35 
36 
37 
38 namespace Renderer {
39 
40 
41 
42 
43 /*==============================================================================
44  State< PLATFORM_OPENCL > specialization
45 ==============================================================================*/
46 
47 
48 template<>
49 struct State< PLATFORM_OPENCL > {
50 
51  inline State( std::shared_ptr< OpenCL::State > const& pState );
52 
53 
54  template< int t_ColorFormat >
55  void TraceScreen(
56  uint8_t* const colors,
57  Vector< unsigned int, 2 > const dimension, // width, height
58  unsigned int const pitch, // bytes per row
59  Synchronized::Heap< PLATFORM_OPENCL > const& heap, // heap containing the octree
60  Synchronized::TextureHeap< t_ColorFormat, PLATFORM_OPENCL > const& textureHeap, // texture heap containing the octree's textures
61  Synchronized::Heap< PLATFORM_OPENCL >::Pointer< Octree::Node< t_ColorFormat, PLATFORM_OPENCL > > const& pRoot, // root of the octree
62  Camera const& camera // camera matrix (x,y,z vectors, and position)
63  ) const;
64 
65 
66 private:
67 
68  std::shared_ptr< OpenCL::State > m_pState;
69  std::unique_ptr< _cl_kernel, OpenCL::Deleter< _cl_kernel > > m_kernel;
70 };
71 
72 
73 
74 
75 /*==============================================================================
76  State< PLATFORM_OPENCL > methods
77 ==============================================================================*/
78 
79 
80 State< PLATFORM_OPENCL >::State( std::shared_ptr< OpenCL::State > const& pState ) : m_pState( pState ) {
81 
82  cl_int result = CL_SUCCESS;
83 
84  static char const* const filenames[] = { "settings.h", "trace.cl", NULL };
85  std::unique_ptr< OpenCL::Program > pProgram( new OpenCL::Program( m_pState, filenames, "-Werror -cl-nv-verbose -cl-no-signed-zeros -cl-fast-relaxed-math", "build.log" ) );
86 
87  m_kernel.reset( clCreateKernel( pProgram->BuiltProgram(), "TraceScreen", &result ) );
88  if ( result != CL_SUCCESS )
89  THROW_OPENCL_EXCEPTION( "clCreateKernel failed!", result );
90 }
91 
92 
93 template< int t_ColorFormat >
94 void State< PLATFORM_OPENCL >::TraceScreen(
95  uint8_t* const colors,
96  Vector< unsigned int, 2 > const dimension,
97  unsigned int const pitch,
100  Synchronized::Heap< PLATFORM_OPENCL >::Pointer< Octree::Node< t_ColorFormat, PLATFORM_OPENCL > > const& pRoot,
101  Camera const& camera
102 ) const
103 {
104  // **TODO: store colors in a 2d image object. this should save us the effort of sending dimension and pitch to the OpenCL kernel.
105 
106  Vector< double, 3 > const& xx = camera.VectorX();
107  Vector< double, 3 > const& yy = camera.VectorY();
108  Vector< double, 3 > const& zz = camera.VectorZ();
109  Vector< double, 3 > const& position = camera.Position();
110  Vector< double, 2 > const fieldOfView = dimension * ( camera.Zoom() / dimension.Maximum() );
111 
112  double const spread = 2 * ( fieldOfView / dimension ).Minimum(); // x2 because for fov=1, the screen is 2 units wide for every 1 unit of distance
113 
114  cl_int result = CL_SUCCESS;
115 
116  cl_uint const oclColorsPitch = ( dimension[ 0 ] * sizeof( cl_uint ) + 0x3f ) & ~0x3f;
117  std::unique_ptr< _cl_mem, OpenCL::Deleter< _cl_mem > > oclColors( clCreateBuffer( m_pState->Context(), CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, dimension[ 1 ] * oclColorsPitch, NULL, &result ) );
118  if ( result != CL_SUCCESS )
119  THROW_OPENCL_EXCEPTION( "clCreateBuffer failed!", result );
120 
121  cl_uint const oclDepthsPitch = ( dimension[ 0 ] * sizeof( cl_float ) + 0x3f ) & ~0x3f;
122  std::unique_ptr< _cl_mem, OpenCL::Deleter< _cl_mem > > oclDepths( clCreateBuffer( m_pState->Context(), CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, dimension[ 1 ] * oclDepthsPitch, NULL, &result ) );
123  if ( result != CL_SUCCESS )
124  THROW_OPENCL_EXCEPTION( "clCreateBuffer failed!", result );
125 
126  { cl_mem const oclColorsPointer = oclColors.get();
127  cl_mem const oclDepthsPointer = oclDepths.get();
128  cl_uint2 const oclDimension = { { dimension[ 0 ], dimension[ 1 ] } };
129  cl_float2 const oclFieldOfView = { { static_cast< cl_float >( fieldOfView[ 0 ] ), static_cast< cl_float >( fieldOfView[ 1 ] ) } };
130  cl_float const oclSpread = static_cast< cl_float >( spread );
131  cl_mem const oclHeap = OpenCL::Convert( heap );
132  cl_mem const oclTextureHeap = OpenCL::Convert( textureHeap );
133  cl_uint const oclRoot = OpenCL::Convert( pRoot );
134  cl_float3 const oclXX = { { static_cast< cl_float >( xx[ 0 ] ), static_cast< cl_float >( xx[ 1 ] ), static_cast< cl_float >( xx[ 2 ] ) } };
135  cl_float3 const oclYY = { { static_cast< cl_float >( yy[ 0 ] ), static_cast< cl_float >( yy[ 1 ] ), static_cast< cl_float >( yy[ 2 ] ) } };
136  cl_float3 const oclZZ = { { static_cast< cl_float >( zz[ 0 ] ), static_cast< cl_float >( zz[ 1 ] ), static_cast< cl_float >( zz[ 2 ] ) } };
137  cl_float3 const oclPP = { { static_cast< cl_float >( position[ 0 ] ), static_cast< cl_float >( position[ 1 ] ), static_cast< cl_float >( position[ 2 ] ) } };
138 
139  unsigned int index = 0;
140  CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++, sizeof( oclColorsPointer ), &oclColorsPointer ) );
141  CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++, sizeof( oclDepthsPointer ), &oclDepthsPointer ) );
142  CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++, sizeof( oclDimension ), &oclDimension ) );
143  CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++, sizeof( oclColorsPitch ), &oclColorsPitch ) );
144  CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++, sizeof( oclDepthsPitch ), &oclDepthsPitch ) );
145  CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++, sizeof( oclFieldOfView ), &oclFieldOfView ) );
146  CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++, sizeof( oclSpread ), &oclSpread ) );
147  CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++, sizeof( oclHeap ), &oclHeap ) );
148  CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++, sizeof( oclTextureHeap ), &oclTextureHeap ) );
149  CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++, sizeof( oclRoot ), &oclRoot ) );
150  CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++, sizeof( oclXX ), &oclXX ) );
151  CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++, sizeof( oclYY ), &oclYY ) );
152  CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++, sizeof( oclZZ ), &oclZZ ) );
153  CHECK_OPENCL( clSetKernelArg( m_kernel.get(), index++, sizeof( oclPP ), &oclPP ) );
154 
155  size_t const localWorkSize[] = { 320 };
156  size_t const workSize[] = { ( ( dimension[ 0 ] * dimension[ 1 ] + localWorkSize[ 0 ] - 1 ) / localWorkSize[ 0 ] ) * localWorkSize[ 0 ] };
157  result = clEnqueueNDRangeKernel( m_pState->Queue(), m_kernel.get(), 1, NULL, workSize, localWorkSize, 0, NULL, NULL );
158  if ( result != CL_SUCCESS )
159  THROW_OPENCL_EXCEPTION( "clEnqueueNDRangeKernel failed!", result );
160  }
161 
162  size_t const colorsSize[] = { dimension[ 0 ] * sizeof( cl_uint ), dimension[ 1 ], 1 };
163  size_t const colorsOrigin[] = { 0, 0, 0 };
164  result = clEnqueueReadBufferRect( m_pState->Queue(), oclColors.get(), CL_TRUE, colorsOrigin, colorsOrigin, colorsSize, colorsSize[ 0 ], 0, pitch, 0, colors, 0, NULL, NULL );
165  if ( result != CL_SUCCESS )
166  THROW_OPENCL_EXCEPTION( "clEnqueueReadBuffer failed!", result );
167 }
168 
169 
170 
171 
172 } // namespace Renderer
173 
174 
175 
176 
177 #endif // SETTING_OPENCL
178 
179 
180 
181 
182 #endif // RENDERER_STATE_OPENCL_H