b3OpenCLUtils.cpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964
  1. /*
  2. Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org
  3. Copyright (C) 2006 - 2011 Sony Computer Entertainment Inc.
  4. This software is provided 'as-is', without any express or implied warranty.
  5. In no event will the authors be held liable for any damages arising from the use of this software.
  6. Permission is granted to anyone to use this software for any purpose,
  7. including commercial applications, and to alter it and redistribute it freely,
  8. subject to the following restrictions:
  9. 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
  10. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
  11. 3. This notice may not be removed or altered from any source distribution.
  12. */
  13. //Original author: Roman Ponomarev
  14. //Mostly Reimplemented by Erwin Coumans
  15. bool gDebugForceLoadingFromSource = false;
  16. bool gDebugSkipLoadingBinary = false;
  17. #include "Bullet3Common/b3Logging.h"
  18. #include <string.h>
  19. #ifdef _WIN32
  20. #pragma warning(disable : 4996)
  21. #endif
  22. #include "b3OpenCLUtils.h"
  23. //#include "b3OpenCLInclude.h"
  24. #include <stdio.h>
  25. #include <stdlib.h>
  26. #define B3_MAX_CL_DEVICES 16 //who needs 16 devices?
  27. #ifdef _WIN32
  28. #include <windows.h>
  29. #endif
  30. #include <assert.h>
  31. #define b3Assert assert
  32. #ifndef _WIN32
  33. #include <sys/stat.h>
  34. #endif
  35. static const char* sCachedBinaryPath = "cache";
  36. //Set the preferred platform vendor using the OpenCL SDK
  37. static const char* spPlatformVendor =
  38. #if defined(CL_PLATFORM_MINI_CL)
  39. "MiniCL, SCEA";
  40. #elif defined(CL_PLATFORM_AMD)
  41. "Advanced Micro Devices, Inc.";
  42. #elif defined(CL_PLATFORM_NVIDIA)
  43. "NVIDIA Corporation";
  44. #elif defined(CL_PLATFORM_INTEL)
  45. "Intel(R) Corporation";
  46. #elif defined(B3_USE_CLEW)
  47. "clew (OpenCL Extension Wrangler library)";
  48. #else
  49. "Unknown Vendor";
  50. #endif
  51. #ifndef CL_PLATFORM_MINI_CL
  52. #ifdef _WIN32
  53. #ifndef B3_USE_CLEW
  54. #include "CL/cl_gl.h"
  55. #endif //B3_USE_CLEW
  56. #endif //_WIN32
  57. #endif
  58. void MyFatalBreakAPPLE(const char* errstr,
  59. const void* private_info,
  60. size_t cb,
  61. void* user_data)
  62. {
  63. const char* patloc = strstr(errstr, "Warning");
  64. //find out if it is a warning or error, exit if error
  65. if (patloc)
  66. {
  67. b3Warning("Warning: %s\n", errstr);
  68. }
  69. else
  70. {
  71. b3Error("Error: %s\n", errstr);
  72. b3Assert(0);
  73. }
  74. }
  75. #ifdef B3_USE_CLEW
  76. int b3OpenCLUtils_clewInit()
  77. {
  78. int result = -1;
  79. #ifdef _WIN32
  80. const char* cl = "OpenCL.dll";
  81. #elif defined __APPLE__
  82. const char* cl = "/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL";
  83. #else //presumable Linux? \
  84. //linux (tested on Ubuntu 12.10 with Catalyst 13.4 beta drivers, not that there is no symbolic link from libOpenCL.so
  85. const char* cl = "libOpenCL.so.1";
  86. result = clewInit(cl);
  87. if (result != CLEW_SUCCESS)
  88. {
  89. cl = "libOpenCL.so";
  90. }
  91. else
  92. {
  93. clewExit();
  94. }
  95. #endif
  96. result = clewInit(cl);
  97. if (result != CLEW_SUCCESS)
  98. {
  99. b3Error("clewInit failed with error code %d\n", result);
  100. }
  101. else
  102. {
  103. b3Printf("clewInit succesfull using %s\n", cl);
  104. }
  105. return result;
  106. }
  107. #endif
  108. int b3OpenCLUtils_getNumPlatforms(cl_int* pErrNum)
  109. {
  110. #ifdef B3_USE_CLEW
  111. b3OpenCLUtils_clewInit();
  112. #endif
  113. cl_platform_id pPlatforms[10] = {0};
  114. cl_uint numPlatforms = 0;
  115. cl_int ciErrNum = clGetPlatformIDs(10, pPlatforms, &numPlatforms);
  116. //cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
  117. if (ciErrNum != CL_SUCCESS)
  118. {
  119. if (pErrNum != NULL)
  120. *pErrNum = ciErrNum;
  121. }
  122. return numPlatforms;
  123. }
  124. const char* b3OpenCLUtils_getSdkVendorName()
  125. {
  126. return spPlatformVendor;
  127. }
  128. void b3OpenCLUtils_setCachePath(const char* path)
  129. {
  130. sCachedBinaryPath = path;
  131. }
  132. cl_platform_id b3OpenCLUtils_getPlatform(int platformIndex0, cl_int* pErrNum)
  133. {
  134. #ifdef B3_USE_CLEW
  135. b3OpenCLUtils_clewInit();
  136. #endif
  137. cl_platform_id platform = 0;
  138. unsigned int platformIndex = (unsigned int)platformIndex0;
  139. cl_uint numPlatforms;
  140. cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
  141. if (platformIndex < numPlatforms)
  142. {
  143. cl_platform_id* platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * numPlatforms);
  144. ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL);
  145. if (ciErrNum != CL_SUCCESS)
  146. {
  147. if (pErrNum != NULL)
  148. *pErrNum = ciErrNum;
  149. return platform;
  150. }
  151. platform = platforms[platformIndex];
  152. free(platforms);
  153. }
  154. return platform;
  155. }
  156. void b3OpenCLUtils::getPlatformInfo(cl_platform_id platform, b3OpenCLPlatformInfo* platformInfo)
  157. {
  158. b3Assert(platform);
  159. cl_int ciErrNum;
  160. ciErrNum = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, B3_MAX_STRING_LENGTH, platformInfo->m_platformVendor, NULL);
  161. oclCHECKERROR(ciErrNum, CL_SUCCESS);
  162. ciErrNum = clGetPlatformInfo(platform, CL_PLATFORM_NAME, B3_MAX_STRING_LENGTH, platformInfo->m_platformName, NULL);
  163. oclCHECKERROR(ciErrNum, CL_SUCCESS);
  164. ciErrNum = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, B3_MAX_STRING_LENGTH, platformInfo->m_platformVersion, NULL);
  165. oclCHECKERROR(ciErrNum, CL_SUCCESS);
  166. }
  167. void b3OpenCLUtils_printPlatformInfo(cl_platform_id platform)
  168. {
  169. b3OpenCLPlatformInfo platformInfo;
  170. b3OpenCLUtils::getPlatformInfo(platform, &platformInfo);
  171. b3Printf("Platform info:\n");
  172. b3Printf(" CL_PLATFORM_VENDOR: \t\t\t%s\n", platformInfo.m_platformVendor);
  173. b3Printf(" CL_PLATFORM_NAME: \t\t\t%s\n", platformInfo.m_platformName);
  174. b3Printf(" CL_PLATFORM_VERSION: \t\t\t%s\n", platformInfo.m_platformVersion);
  175. }
  176. cl_context b3OpenCLUtils_createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex)
  177. {
  178. cl_context retContext = 0;
  179. cl_int ciErrNum = 0;
  180. cl_uint num_entries;
  181. cl_device_id devices[B3_MAX_CL_DEVICES];
  182. cl_uint num_devices;
  183. cl_context_properties* cprops;
  184. /*
  185. * If we could find our platform, use it. Otherwise pass a NULL and get whatever the
  186. * implementation thinks we should be using.
  187. */
  188. cl_context_properties cps[7] = {0, 0, 0, 0, 0, 0, 0};
  189. cps[0] = CL_CONTEXT_PLATFORM;
  190. cps[1] = (cl_context_properties)platform;
  191. #ifdef _WIN32
  192. #ifndef B3_USE_CLEW
  193. if (pGLContext && pGLDC)
  194. {
  195. cps[2] = CL_GL_CONTEXT_KHR;
  196. cps[3] = (cl_context_properties)pGLContext;
  197. cps[4] = CL_WGL_HDC_KHR;
  198. cps[5] = (cl_context_properties)pGLDC;
  199. }
  200. #endif //B3_USE_CLEW
  201. #endif //_WIN32
  202. num_entries = B3_MAX_CL_DEVICES;
  203. num_devices = -1;
  204. ciErrNum = clGetDeviceIDs(
  205. platform,
  206. deviceType,
  207. num_entries,
  208. devices,
  209. &num_devices);
  210. if (ciErrNum < 0)
  211. {
  212. b3Printf("clGetDeviceIDs returned %d\n", ciErrNum);
  213. return 0;
  214. }
  215. cprops = (NULL == platform) ? NULL : cps;
  216. if (!num_devices)
  217. return 0;
  218. if (pGLContext)
  219. {
  220. //search for the GPU that relates to the OpenCL context
  221. unsigned int i;
  222. for (i = 0; i < num_devices; i++)
  223. {
  224. retContext = clCreateContext(cprops, 1, &devices[i], NULL, NULL, &ciErrNum);
  225. if (ciErrNum == CL_SUCCESS)
  226. break;
  227. }
  228. }
  229. else
  230. {
  231. if (preferredDeviceIndex >= 0 && (unsigned int)preferredDeviceIndex < num_devices)
  232. {
  233. //create a context of the preferred device index
  234. retContext = clCreateContext(cprops, 1, &devices[preferredDeviceIndex], NULL, NULL, &ciErrNum);
  235. }
  236. else
  237. {
  238. //create a context of all devices
  239. #if defined(__APPLE__)
  240. retContext = clCreateContext(cprops, num_devices, devices, MyFatalBreakAPPLE, NULL, &ciErrNum);
  241. #else
  242. b3Printf("numDevices=%d\n", num_devices);
  243. retContext = clCreateContext(cprops, num_devices, devices, NULL, NULL, &ciErrNum);
  244. #endif
  245. }
  246. }
  247. if (pErrNum != NULL)
  248. {
  249. *pErrNum = ciErrNum;
  250. };
  251. return retContext;
  252. }
  253. cl_context b3OpenCLUtils_createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex, cl_platform_id* retPlatformId)
  254. {
  255. #ifdef B3_USE_CLEW
  256. b3OpenCLUtils_clewInit();
  257. #endif
  258. cl_uint numPlatforms;
  259. cl_context retContext = 0;
  260. unsigned int i;
  261. cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms);
  262. if (ciErrNum != CL_SUCCESS)
  263. {
  264. if (pErrNum != NULL) *pErrNum = ciErrNum;
  265. return NULL;
  266. }
  267. if (numPlatforms > 0)
  268. {
  269. cl_platform_id* platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * numPlatforms);
  270. ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL);
  271. if (ciErrNum != CL_SUCCESS)
  272. {
  273. if (pErrNum != NULL)
  274. *pErrNum = ciErrNum;
  275. free(platforms);
  276. return NULL;
  277. }
  278. for (i = 0; i < numPlatforms; ++i)
  279. {
  280. char pbuf[128];
  281. ciErrNum = clGetPlatformInfo(platforms[i],
  282. CL_PLATFORM_VENDOR,
  283. sizeof(pbuf),
  284. pbuf,
  285. NULL);
  286. if (ciErrNum != CL_SUCCESS)
  287. {
  288. if (pErrNum != NULL) *pErrNum = ciErrNum;
  289. return NULL;
  290. }
  291. if (preferredPlatformIndex >= 0 && i == preferredPlatformIndex)
  292. {
  293. cl_platform_id tmpPlatform = platforms[0];
  294. platforms[0] = platforms[i];
  295. platforms[i] = tmpPlatform;
  296. break;
  297. }
  298. else
  299. {
  300. if (!strcmp(pbuf, spPlatformVendor))
  301. {
  302. cl_platform_id tmpPlatform = platforms[0];
  303. platforms[0] = platforms[i];
  304. platforms[i] = tmpPlatform;
  305. }
  306. }
  307. }
  308. for (i = 0; i < numPlatforms; ++i)
  309. {
  310. cl_platform_id platform = platforms[i];
  311. assert(platform);
  312. retContext = b3OpenCLUtils_createContextFromPlatform(platform, deviceType, pErrNum, pGLContext, pGLDC, preferredDeviceIndex, preferredPlatformIndex);
  313. if (retContext)
  314. {
  315. // printf("OpenCL platform details:\n");
  316. b3OpenCLPlatformInfo platformInfo;
  317. b3OpenCLUtils::getPlatformInfo(platform, &platformInfo);
  318. if (retPlatformId)
  319. *retPlatformId = platform;
  320. break;
  321. }
  322. }
  323. free(platforms);
  324. }
  325. return retContext;
  326. }
  327. //////////////////////////////////////////////////////////////////////////////
  328. //! Gets the id of the nth device from the context
  329. //!
  330. //! @return the id or -1 when out of range
  331. //! @param cxMainContext OpenCL context
  332. //! @param device_idx index of the device of interest
  333. //////////////////////////////////////////////////////////////////////////////
  334. cl_device_id b3OpenCLUtils_getDevice(cl_context cxMainContext, int deviceIndex)
  335. {
  336. assert(cxMainContext);
  337. size_t szParmDataBytes;
  338. cl_device_id* cdDevices;
  339. cl_device_id device;
  340. // get the list of devices associated with context
  341. clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
  342. if (szParmDataBytes / sizeof(cl_device_id) < (unsigned int)deviceIndex)
  343. {
  344. return (cl_device_id)-1;
  345. }
  346. cdDevices = (cl_device_id*)malloc(szParmDataBytes);
  347. clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);
  348. device = cdDevices[deviceIndex];
  349. free(cdDevices);
  350. return device;
  351. }
  352. int b3OpenCLUtils_getNumDevices(cl_context cxMainContext)
  353. {
  354. size_t szParamDataBytes;
  355. int device_count;
  356. clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParamDataBytes);
  357. device_count = (int)szParamDataBytes / sizeof(cl_device_id);
  358. return device_count;
  359. }
  360. void b3OpenCLUtils::getDeviceInfo(cl_device_id device, b3OpenCLDeviceInfo* info)
  361. {
  362. // CL_DEVICE_NAME
  363. clGetDeviceInfo(device, CL_DEVICE_NAME, B3_MAX_STRING_LENGTH, &info->m_deviceName, NULL);
  364. // CL_DEVICE_VENDOR
  365. clGetDeviceInfo(device, CL_DEVICE_VENDOR, B3_MAX_STRING_LENGTH, &info->m_deviceVendor, NULL);
  366. // CL_DRIVER_VERSION
  367. clGetDeviceInfo(device, CL_DRIVER_VERSION, B3_MAX_STRING_LENGTH, &info->m_driverVersion, NULL);
  368. // CL_DEVICE_INFO
  369. clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(cl_device_type), &info->m_deviceType, NULL);
  370. // CL_DEVICE_MAX_COMPUTE_UNITS
  371. clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(info->m_computeUnits), &info->m_computeUnits, NULL);
  372. // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
  373. clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(info->m_workitemDims), &info->m_workitemDims, NULL);
  374. // CL_DEVICE_MAX_WORK_ITEM_SIZES
  375. clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(info->m_workItemSize), &info->m_workItemSize, NULL);
  376. // CL_DEVICE_MAX_WORK_GROUP_SIZE
  377. clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(info->m_workgroupSize), &info->m_workgroupSize, NULL);
  378. // CL_DEVICE_MAX_CLOCK_FREQUENCY
  379. clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(info->m_clockFrequency), &info->m_clockFrequency, NULL);
  380. // CL_DEVICE_ADDRESS_BITS
  381. clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(info->m_addressBits), &info->m_addressBits, NULL);
  382. // CL_DEVICE_MAX_MEM_ALLOC_SIZE
  383. clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(info->m_maxMemAllocSize), &info->m_maxMemAllocSize, NULL);
  384. // CL_DEVICE_GLOBAL_MEM_SIZE
  385. clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(info->m_globalMemSize), &info->m_globalMemSize, NULL);
  386. // CL_DEVICE_ERROR_CORRECTION_SUPPORT
  387. clGetDeviceInfo(device, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(info->m_errorCorrectionSupport), &info->m_errorCorrectionSupport, NULL);
  388. // CL_DEVICE_LOCAL_MEM_TYPE
  389. clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(info->m_localMemType), &info->m_localMemType, NULL);
  390. // CL_DEVICE_LOCAL_MEM_SIZE
  391. clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(info->m_localMemSize), &info->m_localMemSize, NULL);
  392. // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
  393. clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(info->m_constantBufferSize), &info->m_constantBufferSize, NULL);
  394. // CL_DEVICE_QUEUE_PROPERTIES
  395. clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(info->m_queueProperties), &info->m_queueProperties, NULL);
  396. // CL_DEVICE_IMAGE_SUPPORT
  397. clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(info->m_imageSupport), &info->m_imageSupport, NULL);
  398. // CL_DEVICE_MAX_READ_IMAGE_ARGS
  399. clGetDeviceInfo(device, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(info->m_maxReadImageArgs), &info->m_maxReadImageArgs, NULL);
  400. // CL_DEVICE_MAX_WRITE_IMAGE_ARGS
  401. clGetDeviceInfo(device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(info->m_maxWriteImageArgs), &info->m_maxWriteImageArgs, NULL);
  402. // CL_DEVICE_IMAGE2D_MAX_WIDTH, CL_DEVICE_IMAGE2D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_WIDTH, CL_DEVICE_IMAGE3D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_DEPTH
  403. clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &info->m_image2dMaxWidth, NULL);
  404. clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &info->m_image2dMaxHeight, NULL);
  405. clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &info->m_image3dMaxWidth, NULL);
  406. clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &info->m_image3dMaxHeight, NULL);
  407. clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &info->m_image3dMaxDepth, NULL);
  408. // CL_DEVICE_EXTENSIONS: get device extensions, and if any then parse & log the string onto separate lines
  409. clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, B3_MAX_STRING_LENGTH, &info->m_deviceExtensions, NULL);
  410. // CL_DEVICE_PREFERRED_VECTOR_WIDTH_<type>
  411. clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &info->m_vecWidthChar, NULL);
  412. clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), &info->m_vecWidthShort, NULL);
  413. clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), &info->m_vecWidthInt, NULL);
  414. clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), &info->m_vecWidthLong, NULL);
  415. clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), &info->m_vecWidthFloat, NULL);
  416. clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), &info->m_vecWidthDouble, NULL);
  417. }
  418. void b3OpenCLUtils_printDeviceInfo(cl_device_id device)
  419. {
  420. b3OpenCLDeviceInfo info;
  421. b3OpenCLUtils::getDeviceInfo(device, &info);
  422. b3Printf("Device Info:\n");
  423. b3Printf(" CL_DEVICE_NAME: \t\t\t%s\n", info.m_deviceName);
  424. b3Printf(" CL_DEVICE_VENDOR: \t\t\t%s\n", info.m_deviceVendor);
  425. b3Printf(" CL_DRIVER_VERSION: \t\t\t%s\n", info.m_driverVersion);
  426. if (info.m_deviceType & CL_DEVICE_TYPE_CPU)
  427. b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_CPU");
  428. if (info.m_deviceType & CL_DEVICE_TYPE_GPU)
  429. b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_GPU");
  430. if (info.m_deviceType & CL_DEVICE_TYPE_ACCELERATOR)
  431. b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR");
  432. if (info.m_deviceType & CL_DEVICE_TYPE_DEFAULT)
  433. b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT");
  434. b3Printf(" CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%u\n", info.m_computeUnits);
  435. b3Printf(" CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%u\n", info.m_workitemDims);
  436. b3Printf(" CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%u / %u / %u \n", info.m_workItemSize[0], info.m_workItemSize[1], info.m_workItemSize[2]);
  437. b3Printf(" CL_DEVICE_MAX_WORK_GROUP_SIZE:\t%u\n", info.m_workgroupSize);
  438. b3Printf(" CL_DEVICE_MAX_CLOCK_FREQUENCY:\t%u MHz\n", info.m_clockFrequency);
  439. b3Printf(" CL_DEVICE_ADDRESS_BITS:\t\t%u\n", info.m_addressBits);
  440. b3Printf(" CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_maxMemAllocSize / (1024 * 1024)));
  441. b3Printf(" CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_globalMemSize / (1024 * 1024)));
  442. b3Printf(" CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", info.m_errorCorrectionSupport == CL_TRUE ? "yes" : "no");
  443. b3Printf(" CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", info.m_localMemType == 1 ? "local" : "global");
  444. b3Printf(" CL_DEVICE_LOCAL_MEM_SIZE:\t\t%u KByte\n", (unsigned int)(info.m_localMemSize / 1024));
  445. b3Printf(" CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%u KByte\n", (unsigned int)(info.m_constantBufferSize / 1024));
  446. if (info.m_queueProperties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
  447. b3Printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE");
  448. if (info.m_queueProperties & CL_QUEUE_PROFILING_ENABLE)
  449. b3Printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE");
  450. b3Printf(" CL_DEVICE_IMAGE_SUPPORT:\t\t%u\n", info.m_imageSupport);
  451. b3Printf(" CL_DEVICE_MAX_READ_IMAGE_ARGS:\t%u\n", info.m_maxReadImageArgs);
  452. b3Printf(" CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t%u\n", info.m_maxWriteImageArgs);
  453. b3Printf("\n CL_DEVICE_IMAGE <dim>");
  454. b3Printf("\t\t\t2D_MAX_WIDTH\t %u\n", info.m_image2dMaxWidth);
  455. b3Printf("\t\t\t\t\t2D_MAX_HEIGHT\t %u\n", info.m_image2dMaxHeight);
  456. b3Printf("\t\t\t\t\t3D_MAX_WIDTH\t %u\n", info.m_image3dMaxWidth);
  457. b3Printf("\t\t\t\t\t3D_MAX_HEIGHT\t %u\n", info.m_image3dMaxHeight);
  458. b3Printf("\t\t\t\t\t3D_MAX_DEPTH\t %u\n", info.m_image3dMaxDepth);
  459. if (*info.m_deviceExtensions != 0)
  460. {
  461. b3Printf("\n CL_DEVICE_EXTENSIONS:%s\n", info.m_deviceExtensions);
  462. }
  463. else
  464. {
  465. b3Printf(" CL_DEVICE_EXTENSIONS: None\n");
  466. }
  467. b3Printf(" CL_DEVICE_PREFERRED_VECTOR_WIDTH_<t>\t");
  468. b3Printf("CHAR %u, SHORT %u, INT %u,LONG %u, FLOAT %u, DOUBLE %u\n\n\n",
  469. info.m_vecWidthChar, info.m_vecWidthShort, info.m_vecWidthInt, info.m_vecWidthLong, info.m_vecWidthFloat, info.m_vecWidthDouble);
  470. }
  471. static const char* strip2(const char* name, const char* pattern)
  472. {
  473. size_t const patlen = strlen(pattern);
  474. size_t patcnt = 0;
  475. const char* oriptr;
  476. const char* patloc;
  477. // find how many times the pattern occurs in the original string
  478. for (oriptr = name; (patloc = strstr(oriptr, pattern)); oriptr = patloc + patlen)
  479. {
  480. patcnt++;
  481. }
  482. return oriptr;
  483. }
  484. cl_program b3OpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_device_id device, const char* kernelSourceOrg, cl_int* pErrNum, const char* additionalMacrosArg, const char* clFileNameForCaching, bool disableBinaryCaching)
  485. {
  486. const char* additionalMacros = additionalMacrosArg ? additionalMacrosArg : "";
  487. if (disableBinaryCaching)
  488. {
  489. //kernelSourceOrg = 0;
  490. }
  491. cl_program m_cpProgram = 0;
  492. cl_int status;
  493. char binaryFileName[B3_MAX_STRING_LENGTH];
  494. char deviceName[256];
  495. char driverVersion[256];
  496. const char* strippedName;
  497. int fileUpToDate = 0;
  498. #ifdef _WIN32
  499. int binaryFileValid = 0;
  500. #endif
  501. if (!disableBinaryCaching && clFileNameForCaching)
  502. {
  503. clGetDeviceInfo(device, CL_DEVICE_NAME, 256, &deviceName, NULL);
  504. clGetDeviceInfo(device, CL_DRIVER_VERSION, 256, &driverVersion, NULL);
  505. strippedName = strip2(clFileNameForCaching, "\\");
  506. strippedName = strip2(strippedName, "/");
  507. #ifdef _MSC_VER
  508. sprintf_s(binaryFileName, B3_MAX_STRING_LENGTH, "%s/%s.%s.%s.bin", sCachedBinaryPath, strippedName, deviceName, driverVersion);
  509. #else
  510. sprintf(binaryFileName, "%s/%s.%s.%s.bin", sCachedBinaryPath, strippedName, deviceName, driverVersion);
  511. #endif
  512. }
  513. if (clFileNameForCaching && !(disableBinaryCaching || gDebugSkipLoadingBinary || gDebugForceLoadingFromSource))
  514. {
  515. #ifdef _WIN32
  516. char* bla = 0;
  517. //printf("searching for %s\n", binaryFileName);
  518. FILETIME modtimeBinary;
  519. CreateDirectoryA(sCachedBinaryPath, 0);
  520. {
  521. HANDLE binaryFileHandle = CreateFileA(binaryFileName, GENERIC_READ, 0, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
  522. if (binaryFileHandle == INVALID_HANDLE_VALUE)
  523. {
  524. DWORD errorCode;
  525. errorCode = GetLastError();
  526. switch (errorCode)
  527. {
  528. case ERROR_FILE_NOT_FOUND:
  529. {
  530. b3Warning("\nCached file not found %s\n", binaryFileName);
  531. break;
  532. }
  533. case ERROR_PATH_NOT_FOUND:
  534. {
  535. b3Warning("\nCached file path not found %s\n", binaryFileName);
  536. break;
  537. }
  538. default:
  539. {
  540. b3Warning("\nFailed reading cached file with errorCode = %d\n", errorCode);
  541. }
  542. }
  543. }
  544. else
  545. {
  546. if (GetFileTime(binaryFileHandle, NULL, NULL, &modtimeBinary) == 0)
  547. {
  548. DWORD errorCode;
  549. errorCode = GetLastError();
  550. b3Warning("\nGetFileTime errorCode = %d\n", errorCode);
  551. }
  552. else
  553. {
  554. binaryFileValid = 1;
  555. }
  556. CloseHandle(binaryFileHandle);
  557. }
  558. if (binaryFileValid)
  559. {
  560. HANDLE srcFileHandle = CreateFileA(clFileNameForCaching, GENERIC_READ, 0, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
  561. if (srcFileHandle == INVALID_HANDLE_VALUE)
  562. {
  563. const char* prefix[] = {"./", "../", "../../", "../../../", "../../../../"};
  564. for (int i = 0; (srcFileHandle == INVALID_HANDLE_VALUE) && i < 5; i++)
  565. {
  566. char relativeFileName[1024];
  567. sprintf(relativeFileName, "%s%s", prefix[i], clFileNameForCaching);
  568. srcFileHandle = CreateFileA(relativeFileName, GENERIC_READ, 0, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
  569. }
  570. }
  571. if (srcFileHandle != INVALID_HANDLE_VALUE)
  572. {
  573. FILETIME modtimeSrc;
  574. if (GetFileTime(srcFileHandle, NULL, NULL, &modtimeSrc) == 0)
  575. {
  576. DWORD errorCode;
  577. errorCode = GetLastError();
  578. b3Warning("\nGetFileTime errorCode = %d\n", errorCode);
  579. }
  580. if ((modtimeSrc.dwHighDateTime < modtimeBinary.dwHighDateTime) || ((modtimeSrc.dwHighDateTime == modtimeBinary.dwHighDateTime) && (modtimeSrc.dwLowDateTime <= modtimeBinary.dwLowDateTime)))
  581. {
  582. fileUpToDate = 1;
  583. }
  584. else
  585. {
  586. b3Warning("\nCached binary file out-of-date (%s)\n", binaryFileName);
  587. }
  588. CloseHandle(srcFileHandle);
  589. }
  590. else
  591. {
  592. #ifdef _DEBUG
  593. DWORD errorCode;
  594. errorCode = GetLastError();
  595. switch (errorCode)
  596. {
  597. case ERROR_FILE_NOT_FOUND:
  598. {
  599. b3Warning("\nSrc file not found %s\n", clFileNameForCaching);
  600. break;
  601. }
  602. case ERROR_PATH_NOT_FOUND:
  603. {
  604. b3Warning("\nSrc path not found %s\n", clFileNameForCaching);
  605. break;
  606. }
  607. default:
  608. {
  609. b3Warning("\nnSrc file reading errorCode = %d\n", errorCode);
  610. }
  611. }
  612. //we should make sure the src file exists so we can verify the timestamp with binary
  613. // assert(0);
  614. b3Warning("Warning: cannot find OpenCL kernel %s to verify timestamp of binary cached kernel %s\n", clFileNameForCaching, binaryFileName);
  615. fileUpToDate = true;
  616. #else
  617. //if we cannot find the source, assume it is OK in release builds
  618. fileUpToDate = true;
  619. #endif
  620. }
  621. }
  622. }
  623. #else
  624. fileUpToDate = true;
  625. if (mkdir(sCachedBinaryPath, 0777) == -1)
  626. {
  627. }
  628. else
  629. {
  630. b3Printf("Succesfully created cache directory: %s\n", sCachedBinaryPath);
  631. }
  632. #endif //_WIN32
  633. }
  634. if (fileUpToDate)
  635. {
  636. #ifdef _MSC_VER
  637. FILE* file;
  638. if (fopen_s(&file, binaryFileName, "rb") != 0)
  639. file = 0;
  640. #else
  641. FILE* file = fopen(binaryFileName, "rb");
  642. #endif
  643. if (file)
  644. {
  645. size_t binarySize = 0;
  646. char* binary = 0;
  647. fseek(file, 0L, SEEK_END);
  648. binarySize = ftell(file);
  649. rewind(file);
  650. binary = (char*)malloc(sizeof(char) * binarySize);
  651. int bytesRead;
  652. bytesRead = fread(binary, sizeof(char), binarySize, file);
  653. fclose(file);
  654. m_cpProgram = clCreateProgramWithBinary(clContext, 1, &device, &binarySize, (const unsigned char**)&binary, 0, &status);
  655. b3Assert(status == CL_SUCCESS);
  656. status = clBuildProgram(m_cpProgram, 1, &device, additionalMacros, 0, 0);
  657. b3Assert(status == CL_SUCCESS);
  658. if (status != CL_SUCCESS)
  659. {
  660. char* build_log;
  661. size_t ret_val_size;
  662. clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
  663. build_log = (char*)malloc(sizeof(char) * (ret_val_size + 1));
  664. clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
  665. build_log[ret_val_size] = '\0';
  666. b3Error("%s\n", build_log);
  667. free(build_log);
  668. b3Assert(0);
  669. m_cpProgram = 0;
  670. b3Warning("clBuildProgram reported failure on cached binary: %s\n", binaryFileName);
  671. }
  672. else
  673. {
  674. b3Printf("clBuildProgram successfully compiled cached binary: %s\n", binaryFileName);
  675. }
  676. free(binary);
  677. }
  678. else
  679. {
  680. b3Warning("Cannot open cached binary: %s\n", binaryFileName);
  681. }
  682. }
  683. if (!m_cpProgram)
  684. {
  685. cl_int localErrNum;
  686. char* compileFlags;
  687. int flagsize;
  688. const char* kernelSource = kernelSourceOrg;
  689. if (!kernelSourceOrg || gDebugForceLoadingFromSource)
  690. {
  691. if (clFileNameForCaching)
  692. {
  693. FILE* file = fopen(clFileNameForCaching, "rb");
  694. //in many cases the relative path is a few levels up the directory hierarchy, so try it
  695. if (!file)
  696. {
  697. const char* prefix[] = {"../", "../../", "../../../", "../../../../"};
  698. for (int i = 0; !file && i < 3; i++)
  699. {
  700. char relativeFileName[1024];
  701. sprintf(relativeFileName, "%s%s", prefix[i], clFileNameForCaching);
  702. file = fopen(relativeFileName, "rb");
  703. }
  704. }
  705. if (file)
  706. {
  707. char* kernelSrc = 0;
  708. fseek(file, 0L, SEEK_END);
  709. int kernelSize = ftell(file);
  710. rewind(file);
  711. kernelSrc = (char*)malloc(kernelSize + 1);
  712. int readBytes;
  713. readBytes = fread((void*)kernelSrc, 1, kernelSize, file);
  714. kernelSrc[kernelSize] = 0;
  715. fclose(file);
  716. kernelSource = kernelSrc;
  717. }
  718. }
  719. }
  720. size_t program_length = kernelSource ? strlen(kernelSource) : 0;
  721. #ifdef MAC //or __APPLE__?
  722. char* flags = "-cl-mad-enable -DMAC ";
  723. #else
  724. const char* flags = "";
  725. #endif
  726. m_cpProgram = clCreateProgramWithSource(clContext, 1, (const char**)&kernelSource, &program_length, &localErrNum);
  727. if (localErrNum != CL_SUCCESS)
  728. {
  729. if (pErrNum)
  730. *pErrNum = localErrNum;
  731. return 0;
  732. }
  733. // Build the program with 'mad' Optimization option
  734. flagsize = sizeof(char) * (strlen(additionalMacros) + strlen(flags) + 5);
  735. compileFlags = (char*)malloc(flagsize);
  736. #ifdef _MSC_VER
  737. sprintf_s(compileFlags, flagsize, "%s %s", flags, additionalMacros);
  738. #else
  739. sprintf(compileFlags, "%s %s", flags, additionalMacros);
  740. #endif
  741. localErrNum = clBuildProgram(m_cpProgram, 1, &device, compileFlags, NULL, NULL);
  742. if (localErrNum != CL_SUCCESS)
  743. {
  744. char* build_log;
  745. size_t ret_val_size;
  746. clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
  747. build_log = (char*)malloc(sizeof(char) * (ret_val_size + 1));
  748. clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
  749. // to be carefully, terminate with \0
  750. // there's no information in the reference whether the string is 0 terminated or not
  751. build_log[ret_val_size] = '\0';
  752. b3Error("Error in clBuildProgram, Line %u in file %s, Log: \n%s\n !!!\n\n", __LINE__, __FILE__, build_log);
  753. free(build_log);
  754. if (pErrNum)
  755. *pErrNum = localErrNum;
  756. return 0;
  757. }
  758. if (!disableBinaryCaching && clFileNameForCaching)
  759. { // write to binary
  760. cl_uint numAssociatedDevices;
  761. status = clGetProgramInfo(m_cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &numAssociatedDevices, 0);
  762. b3Assert(status == CL_SUCCESS);
  763. if (numAssociatedDevices == 1)
  764. {
  765. size_t binarySize;
  766. char* binary;
  767. status = clGetProgramInfo(m_cpProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, 0);
  768. b3Assert(status == CL_SUCCESS);
  769. binary = (char*)malloc(sizeof(char) * binarySize);
  770. status = clGetProgramInfo(m_cpProgram, CL_PROGRAM_BINARIES, sizeof(char*), &binary, 0);
  771. b3Assert(status == CL_SUCCESS);
  772. {
  773. FILE* file = 0;
  774. #ifdef _MSC_VER
  775. if (fopen_s(&file, binaryFileName, "wb") != 0)
  776. file = 0;
  777. #else
  778. file = fopen(binaryFileName, "wb");
  779. #endif
  780. if (file)
  781. {
  782. fwrite(binary, sizeof(char), binarySize, file);
  783. fclose(file);
  784. }
  785. else
  786. {
  787. b3Warning("cannot write file %s\n", binaryFileName);
  788. }
  789. }
  790. free(binary);
  791. }
  792. }
  793. free(compileFlags);
  794. }
  795. return m_cpProgram;
  796. }
  797. cl_kernel b3OpenCLUtils_compileCLKernelFromString(cl_context clContext, cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum, cl_program prog, const char* additionalMacros)
  798. {
  799. cl_kernel kernel;
  800. cl_int localErrNum;
  801. cl_program m_cpProgram = prog;
  802. b3Printf("compiling kernel %s ", kernelName);
  803. if (!m_cpProgram)
  804. {
  805. m_cpProgram = b3OpenCLUtils_compileCLProgramFromString(clContext, device, kernelSource, pErrNum, additionalMacros, 0, false);
  806. }
  807. // Create the kernel
  808. kernel = clCreateKernel(m_cpProgram, kernelName, &localErrNum);
  809. if (localErrNum != CL_SUCCESS)
  810. {
  811. b3Error("Error in clCreateKernel, Line %u in file %s, cannot find kernel function %s !!!\n\n", __LINE__, __FILE__, kernelName);
  812. assert(0);
  813. if (pErrNum)
  814. *pErrNum = localErrNum;
  815. return 0;
  816. }
  817. if (!prog && m_cpProgram)
  818. {
  819. clReleaseProgram(m_cpProgram);
  820. }
  821. b3Printf("ready. \n");
  822. if (pErrNum)
  823. *pErrNum = CL_SUCCESS;
  824. return kernel;
  825. }