b3LauncherCL.h 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. #ifndef B3_LAUNCHER_CL_H
  2. #define B3_LAUNCHER_CL_H
  3. #include "b3BufferInfoCL.h"
  4. #include "Bullet3Common/b3MinMax.h"
  5. #include "b3OpenCLArray.h"
  6. #include <stdio.h>
  7. #define B3_DEBUG_SERIALIZE_CL
  8. #ifdef _WIN32
  9. #pragma warning(disable : 4996)
  10. #endif
  11. #define B3_CL_MAX_ARG_SIZE 16
  12. B3_ATTRIBUTE_ALIGNED16(struct)
  13. b3KernelArgData
  14. {
  15. int m_isBuffer;
  16. int m_argIndex;
  17. int m_argSizeInBytes;
  18. int m_unusedPadding;
  19. union {
  20. cl_mem m_clBuffer;
  21. unsigned char m_argData[B3_CL_MAX_ARG_SIZE];
  22. };
  23. };
  24. class b3LauncherCL
  25. {
  26. cl_command_queue m_commandQueue;
  27. cl_kernel m_kernel;
  28. int m_idx;
  29. b3AlignedObjectArray<b3KernelArgData> m_kernelArguments;
  30. int m_serializationSizeInBytes;
  31. bool m_enableSerialization;
  32. const char* m_name;
  33. public:
  34. b3AlignedObjectArray<b3OpenCLArray<unsigned char>*> m_arrays;
  35. b3LauncherCL(cl_command_queue queue, cl_kernel kernel, const char* name);
  36. virtual ~b3LauncherCL();
  37. void setBuffer(cl_mem clBuffer);
  38. void setBuffers(b3BufferInfoCL* buffInfo, int n);
  39. int getSerializationBufferSize() const
  40. {
  41. return m_serializationSizeInBytes;
  42. }
  43. int deserializeArgs(unsigned char* buf, int bufSize, cl_context ctx);
  44. inline int validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx);
  45. int serializeArguments(unsigned char* destBuffer, int destBufferCapacity);
  46. int getNumArguments() const
  47. {
  48. return m_kernelArguments.size();
  49. }
  50. b3KernelArgData getArgument(int index)
  51. {
  52. return m_kernelArguments[index];
  53. }
  54. void serializeToFile(const char* fileName, int numWorkItems);
  55. template <typename T>
  56. inline void setConst(const T& consts)
  57. {
  58. int sz = sizeof(T);
  59. b3Assert(sz <= B3_CL_MAX_ARG_SIZE);
  60. if (m_enableSerialization)
  61. {
  62. b3KernelArgData kernelArg;
  63. kernelArg.m_argIndex = m_idx;
  64. kernelArg.m_isBuffer = 0;
  65. T* destArg = (T*)kernelArg.m_argData;
  66. *destArg = consts;
  67. kernelArg.m_argSizeInBytes = sizeof(T);
  68. m_kernelArguments.push_back(kernelArg);
  69. m_serializationSizeInBytes += sizeof(b3KernelArgData);
  70. }
  71. cl_int status = clSetKernelArg(m_kernel, m_idx++, sz, &consts);
  72. b3Assert(status == CL_SUCCESS);
  73. }
  74. inline void launch1D(int numThreads, int localSize = 64)
  75. {
  76. launch2D(numThreads, 1, localSize, 1);
  77. }
  78. inline void launch2D(int numThreadsX, int numThreadsY, int localSizeX, int localSizeY)
  79. {
  80. size_t gRange[3] = {1, 1, 1};
  81. size_t lRange[3] = {1, 1, 1};
  82. lRange[0] = localSizeX;
  83. lRange[1] = localSizeY;
  84. gRange[0] = b3Max((size_t)1, (numThreadsX / lRange[0]) + (!(numThreadsX % lRange[0]) ? 0 : 1));
  85. gRange[0] *= lRange[0];
  86. gRange[1] = b3Max((size_t)1, (numThreadsY / lRange[1]) + (!(numThreadsY % lRange[1]) ? 0 : 1));
  87. gRange[1] *= lRange[1];
  88. cl_int status = clEnqueueNDRangeKernel(m_commandQueue,
  89. m_kernel, 2, NULL, gRange, lRange, 0, 0, 0);
  90. if (status != CL_SUCCESS)
  91. {
  92. printf("Error: OpenCL status = %d\n", status);
  93. }
  94. b3Assert(status == CL_SUCCESS);
  95. }
  96. void enableSerialization(bool serialize)
  97. {
  98. m_enableSerialization = serialize;
  99. }
  100. };
  101. #endif //B3_LAUNCHER_CL_H