b3GpuRigidBodyPipeline.cpp 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678
  1. /*
  2. Copyright (c) 2013 Advanced Micro Devices, Inc.
  3. This software is provided 'as-is', without any express or implied warranty.
  4. In no event will the authors be held liable for any damages arising from the use of this software.
  5. Permission is granted to anyone to use this software for any purpose,
  6. including commercial applications, and to alter it and redistribute it freely,
  7. subject to the following restrictions:
  8. 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
  9. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
  10. 3. This notice may not be removed or altered from any source distribution.
  11. */
  12. //Originally written by Erwin Coumans
  13. #include "b3GpuRigidBodyPipeline.h"
  14. #include "b3GpuRigidBodyPipelineInternalData.h"
  15. #include "kernels/integrateKernel.h"
  16. #include "kernels/updateAabbsKernel.h"
  17. #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
  18. #include "b3GpuNarrowPhase.h"
  19. #include "Bullet3Geometry/b3AabbUtil.h"
  20. #include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h"
  21. #include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h"
  22. #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
  23. #include "Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h"
  24. #include "Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h"
  25. #include "Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h"
  26. //#define TEST_OTHER_GPU_SOLVER
  27. #define B3_RIGIDBODY_INTEGRATE_PATH "src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl"
  28. #define B3_RIGIDBODY_UPDATEAABB_PATH "src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl"
  29. bool useBullet2CpuSolver = true;
  30. //choice of contact solver
  31. bool gUseJacobi = false;
  32. bool gUseDbvt = false;
  33. bool gDumpContactStats = false;
  34. bool gCalcWorldSpaceAabbOnCpu = false;
  35. bool gUseCalculateOverlappingPairsHost = false;
  36. bool gIntegrateOnCpu = false;
  37. bool gClearPairsOnGpu = true;
  38. #define TEST_OTHER_GPU_SOLVER 1
  39. #ifdef TEST_OTHER_GPU_SOLVER
  40. #include "b3GpuJacobiContactSolver.h"
  41. #endif //TEST_OTHER_GPU_SOLVER
  42. #include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
  43. #include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h"
  44. #include "Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.h"
  45. #include "b3GpuPgsContactSolver.h"
  46. #include "b3Solver.h"
  47. #include "Bullet3Collision/NarrowPhaseCollision/b3Config.h"
  48. #include "Bullet3OpenCL/Raycast/b3GpuRaycast.h"
  49. #include "Bullet3Dynamics/shared/b3IntegrateTransforms.h"
  50. #include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h"
  51. b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx, cl_device_id device, cl_command_queue q, class b3GpuNarrowPhase* narrowphase, class b3GpuBroadphaseInterface* broadphaseSap, struct b3DynamicBvhBroadphase* broadphaseDbvt, const b3Config& config)
  52. {
  53. m_data = new b3GpuRigidBodyPipelineInternalData;
  54. m_data->m_constraintUid = 0;
  55. m_data->m_config = config;
  56. m_data->m_context = ctx;
  57. m_data->m_device = device;
  58. m_data->m_queue = q;
  59. m_data->m_solver = new b3PgsJacobiSolver(true); //new b3PgsJacobiSolver(true);
  60. m_data->m_gpuSolver = new b3GpuPgsConstraintSolver(ctx, device, q, true); //new b3PgsJacobiSolver(true);
  61. m_data->m_allAabbsGPU = new b3OpenCLArray<b3SapAabb>(ctx, q, config.m_maxConvexBodies);
  62. m_data->m_overlappingPairsGPU = new b3OpenCLArray<b3BroadphasePair>(ctx, q, config.m_maxBroadphasePairs);
  63. m_data->m_gpuConstraints = new b3OpenCLArray<b3GpuGenericConstraint>(ctx, q);
  64. #ifdef TEST_OTHER_GPU_SOLVER
  65. m_data->m_solver3 = new b3GpuJacobiContactSolver(ctx, device, q, config.m_maxBroadphasePairs);
  66. #endif // TEST_OTHER_GPU_SOLVER
  67. m_data->m_solver2 = new b3GpuPgsContactSolver(ctx, device, q, config.m_maxBroadphasePairs);
  68. m_data->m_raycaster = new b3GpuRaycast(ctx, device, q);
  69. m_data->m_broadphaseDbvt = broadphaseDbvt;
  70. m_data->m_broadphaseSap = broadphaseSap;
  71. m_data->m_narrowphase = narrowphase;
  72. m_data->m_gravity.setValue(0.f, -9.8f, 0.f);
  73. cl_int errNum = 0;
  74. {
  75. cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context, m_data->m_device, integrateKernelCL, &errNum, "", B3_RIGIDBODY_INTEGRATE_PATH);
  76. b3Assert(errNum == CL_SUCCESS);
  77. m_data->m_integrateTransformsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, integrateKernelCL, "integrateTransformsKernel", &errNum, prog);
  78. b3Assert(errNum == CL_SUCCESS);
  79. clReleaseProgram(prog);
  80. }
  81. {
  82. cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context, m_data->m_device, updateAabbsKernelCL, &errNum, "", B3_RIGIDBODY_UPDATEAABB_PATH);
  83. b3Assert(errNum == CL_SUCCESS);
  84. m_data->m_updateAabbsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, updateAabbsKernelCL, "initializeGpuAabbsFull", &errNum, prog);
  85. b3Assert(errNum == CL_SUCCESS);
  86. m_data->m_clearOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, updateAabbsKernelCL, "clearOverlappingPairsKernel", &errNum, prog);
  87. b3Assert(errNum == CL_SUCCESS);
  88. clReleaseProgram(prog);
  89. }
  90. }
  91. b3GpuRigidBodyPipeline::~b3GpuRigidBodyPipeline()
  92. {
  93. if (m_data->m_integrateTransformsKernel)
  94. clReleaseKernel(m_data->m_integrateTransformsKernel);
  95. if (m_data->m_updateAabbsKernel)
  96. clReleaseKernel(m_data->m_updateAabbsKernel);
  97. if (m_data->m_clearOverlappingPairsKernel)
  98. clReleaseKernel(m_data->m_clearOverlappingPairsKernel);
  99. delete m_data->m_raycaster;
  100. delete m_data->m_solver;
  101. delete m_data->m_allAabbsGPU;
  102. delete m_data->m_gpuConstraints;
  103. delete m_data->m_overlappingPairsGPU;
  104. #ifdef TEST_OTHER_GPU_SOLVER
  105. delete m_data->m_solver3;
  106. #endif //TEST_OTHER_GPU_SOLVER
  107. delete m_data->m_solver2;
  108. delete m_data;
  109. }
  110. void b3GpuRigidBodyPipeline::reset()
  111. {
  112. m_data->m_gpuConstraints->resize(0);
  113. m_data->m_cpuConstraints.resize(0);
  114. m_data->m_allAabbsGPU->resize(0);
  115. m_data->m_allAabbsCPU.resize(0);
  116. }
  117. void b3GpuRigidBodyPipeline::addConstraint(b3TypedConstraint* constraint)
  118. {
  119. m_data->m_joints.push_back(constraint);
  120. }
  121. void b3GpuRigidBodyPipeline::removeConstraint(b3TypedConstraint* constraint)
  122. {
  123. m_data->m_joints.remove(constraint);
  124. }
  125. void b3GpuRigidBodyPipeline::removeConstraintByUid(int uid)
  126. {
  127. m_data->m_gpuSolver->recomputeBatches();
  128. //slow linear search
  129. m_data->m_gpuConstraints->copyToHost(m_data->m_cpuConstraints);
  130. //remove
  131. for (int i = 0; i < m_data->m_cpuConstraints.size(); i++)
  132. {
  133. if (m_data->m_cpuConstraints[i].m_uid == uid)
  134. {
  135. //m_data->m_cpuConstraints.remove(m_data->m_cpuConstraints[i]);
  136. m_data->m_cpuConstraints.swap(i, m_data->m_cpuConstraints.size() - 1);
  137. m_data->m_cpuConstraints.pop_back();
  138. break;
  139. }
  140. }
  141. if (m_data->m_cpuConstraints.size())
  142. {
  143. m_data->m_gpuConstraints->copyFromHost(m_data->m_cpuConstraints);
  144. }
  145. else
  146. {
  147. m_data->m_gpuConstraints->resize(0);
  148. }
  149. }
  150. int b3GpuRigidBodyPipeline::createPoint2PointConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, float breakingThreshold)
  151. {
  152. m_data->m_gpuSolver->recomputeBatches();
  153. b3GpuGenericConstraint c;
  154. c.m_uid = m_data->m_constraintUid;
  155. m_data->m_constraintUid++;
  156. c.m_flags = B3_CONSTRAINT_FLAG_ENABLED;
  157. c.m_rbA = bodyA;
  158. c.m_rbB = bodyB;
  159. c.m_pivotInA.setValue(pivotInA[0], pivotInA[1], pivotInA[2]);
  160. c.m_pivotInB.setValue(pivotInB[0], pivotInB[1], pivotInB[2]);
  161. c.m_breakingImpulseThreshold = breakingThreshold;
  162. c.m_constraintType = B3_GPU_POINT2POINT_CONSTRAINT_TYPE;
  163. m_data->m_cpuConstraints.push_back(c);
  164. return c.m_uid;
  165. }
  166. int b3GpuRigidBodyPipeline::createFixedConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, const float* relTargetAB, float breakingThreshold)
  167. {
  168. m_data->m_gpuSolver->recomputeBatches();
  169. b3GpuGenericConstraint c;
  170. c.m_uid = m_data->m_constraintUid;
  171. m_data->m_constraintUid++;
  172. c.m_flags = B3_CONSTRAINT_FLAG_ENABLED;
  173. c.m_rbA = bodyA;
  174. c.m_rbB = bodyB;
  175. c.m_pivotInA.setValue(pivotInA[0], pivotInA[1], pivotInA[2]);
  176. c.m_pivotInB.setValue(pivotInB[0], pivotInB[1], pivotInB[2]);
  177. c.m_relTargetAB.setValue(relTargetAB[0], relTargetAB[1], relTargetAB[2], relTargetAB[3]);
  178. c.m_breakingImpulseThreshold = breakingThreshold;
  179. c.m_constraintType = B3_GPU_FIXED_CONSTRAINT_TYPE;
  180. m_data->m_cpuConstraints.push_back(c);
  181. return c.m_uid;
  182. }
  183. void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
  184. {
  185. //update worldspace AABBs from local AABB/worldtransform
  186. {
  187. B3_PROFILE("setupGpuAabbs");
  188. setupGpuAabbsFull();
  189. }
  190. int numPairs = 0;
  191. //compute overlapping pairs
  192. {
  193. if (gUseDbvt)
  194. {
  195. {
  196. B3_PROFILE("setAabb");
  197. m_data->m_allAabbsGPU->copyToHost(m_data->m_allAabbsCPU);
  198. for (int i = 0; i < m_data->m_allAabbsCPU.size(); i++)
  199. {
  200. b3Vector3 aabbMin = b3MakeVector3(m_data->m_allAabbsCPU[i].m_min[0], m_data->m_allAabbsCPU[i].m_min[1], m_data->m_allAabbsCPU[i].m_min[2]);
  201. b3Vector3 aabbMax = b3MakeVector3(m_data->m_allAabbsCPU[i].m_max[0], m_data->m_allAabbsCPU[i].m_max[1], m_data->m_allAabbsCPU[i].m_max[2]);
  202. m_data->m_broadphaseDbvt->setAabb(i, aabbMin, aabbMax, 0);
  203. }
  204. }
  205. {
  206. B3_PROFILE("calculateOverlappingPairs");
  207. m_data->m_broadphaseDbvt->calculateOverlappingPairs();
  208. }
  209. numPairs = m_data->m_broadphaseDbvt->getOverlappingPairCache()->getNumOverlappingPairs();
  210. }
  211. else
  212. {
  213. if (gUseCalculateOverlappingPairsHost)
  214. {
  215. m_data->m_broadphaseSap->calculateOverlappingPairsHost(m_data->m_config.m_maxBroadphasePairs);
  216. }
  217. else
  218. {
  219. m_data->m_broadphaseSap->calculateOverlappingPairs(m_data->m_config.m_maxBroadphasePairs);
  220. }
  221. numPairs = m_data->m_broadphaseSap->getNumOverlap();
  222. }
  223. }
  224. //compute contact points
  225. // printf("numPairs=%d\n",numPairs);
  226. int numContacts = 0;
  227. int numBodies = m_data->m_narrowphase->getNumRigidBodies();
  228. if (numPairs)
  229. {
  230. cl_mem pairs = 0;
  231. cl_mem aabbsWS = 0;
  232. if (gUseDbvt)
  233. {
  234. B3_PROFILE("m_overlappingPairsGPU->copyFromHost");
  235. m_data->m_overlappingPairsGPU->copyFromHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray());
  236. pairs = m_data->m_overlappingPairsGPU->getBufferCL();
  237. aabbsWS = m_data->m_allAabbsGPU->getBufferCL();
  238. }
  239. else
  240. {
  241. pairs = m_data->m_broadphaseSap->getOverlappingPairBuffer();
  242. aabbsWS = m_data->m_broadphaseSap->getAabbBufferWS();
  243. }
  244. m_data->m_overlappingPairsGPU->resize(numPairs);
  245. //mark the contacts for each pair as 'unused'
  246. if (numPairs)
  247. {
  248. b3OpenCLArray<b3BroadphasePair> gpuPairs(this->m_data->m_context, m_data->m_queue);
  249. gpuPairs.setFromOpenCLBuffer(pairs, numPairs);
  250. if (gClearPairsOnGpu)
  251. {
  252. //b3AlignedObjectArray<b3BroadphasePair> hostPairs;//just for debugging
  253. //gpuPairs.copyToHost(hostPairs);
  254. b3LauncherCL launcher(m_data->m_queue, m_data->m_clearOverlappingPairsKernel, "clearOverlappingPairsKernel");
  255. launcher.setBuffer(pairs);
  256. launcher.setConst(numPairs);
  257. launcher.launch1D(numPairs);
  258. //gpuPairs.copyToHost(hostPairs);
  259. }
  260. else
  261. {
  262. b3AlignedObjectArray<b3BroadphasePair> hostPairs;
  263. gpuPairs.copyToHost(hostPairs);
  264. for (int i = 0; i < hostPairs.size(); i++)
  265. {
  266. hostPairs[i].z = 0xffffffff;
  267. }
  268. gpuPairs.copyFromHost(hostPairs);
  269. }
  270. }
  271. m_data->m_narrowphase->computeContacts(pairs, numPairs, aabbsWS, numBodies);
  272. numContacts = m_data->m_narrowphase->getNumContactsGpu();
  273. if (gUseDbvt)
  274. {
  275. ///store the cached information (contact locations in the 'z' component)
  276. B3_PROFILE("m_overlappingPairsGPU->copyToHost");
  277. m_data->m_overlappingPairsGPU->copyToHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray());
  278. }
  279. if (gDumpContactStats && numContacts)
  280. {
  281. m_data->m_narrowphase->getContactsGpu();
  282. printf("numContacts = %d\n", numContacts);
  283. int totalPoints = 0;
  284. const b3Contact4* contacts = m_data->m_narrowphase->getContactsCPU();
  285. for (int i = 0; i < numContacts; i++)
  286. {
  287. totalPoints += contacts->getNPoints();
  288. }
  289. printf("totalPoints=%d\n", totalPoints);
  290. }
  291. }
  292. //convert contact points to contact constraints
  293. //solve constraints
  294. b3OpenCLArray<b3RigidBodyData> gpuBodies(m_data->m_context, m_data->m_queue, 0, true);
  295. gpuBodies.setFromOpenCLBuffer(m_data->m_narrowphase->getBodiesGpu(), m_data->m_narrowphase->getNumRigidBodies());
  296. b3OpenCLArray<b3InertiaData> gpuInertias(m_data->m_context, m_data->m_queue, 0, true);
  297. gpuInertias.setFromOpenCLBuffer(m_data->m_narrowphase->getBodyInertiasGpu(), m_data->m_narrowphase->getNumRigidBodies());
  298. b3OpenCLArray<b3Contact4> gpuContacts(m_data->m_context, m_data->m_queue, 0, true);
  299. gpuContacts.setFromOpenCLBuffer(m_data->m_narrowphase->getContactsGpu(), m_data->m_narrowphase->getNumContactsGpu());
  300. int numJoints = m_data->m_joints.size() ? m_data->m_joints.size() : m_data->m_cpuConstraints.size();
  301. if (useBullet2CpuSolver && numJoints)
  302. {
  303. // b3AlignedObjectArray<b3Contact4> hostContacts;
  304. //gpuContacts.copyToHost(hostContacts);
  305. {
  306. bool useGpu = m_data->m_joints.size() == 0;
  307. // b3Contact4* contacts = numContacts? &hostContacts[0]: 0;
  308. //m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,contacts,numJoints, joints);
  309. if (useGpu)
  310. {
  311. m_data->m_gpuSolver->solveJoints(m_data->m_narrowphase->getNumRigidBodies(), &gpuBodies, &gpuInertias, numJoints, m_data->m_gpuConstraints);
  312. }
  313. else
  314. {
  315. b3AlignedObjectArray<b3RigidBodyData> hostBodies;
  316. gpuBodies.copyToHost(hostBodies);
  317. b3AlignedObjectArray<b3InertiaData> hostInertias;
  318. gpuInertias.copyToHost(hostInertias);
  319. b3TypedConstraint** joints = numJoints ? &m_data->m_joints[0] : 0;
  320. m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumRigidBodies(), &hostBodies[0], &hostInertias[0], 0, 0, numJoints, joints);
  321. gpuBodies.copyFromHost(hostBodies);
  322. }
  323. }
  324. }
  325. if (numContacts)
  326. {
  327. #ifdef TEST_OTHER_GPU_SOLVER
  328. if (gUseJacobi)
  329. {
  330. bool useGpu = true;
  331. if (useGpu)
  332. {
  333. bool forceHost = false;
  334. if (forceHost)
  335. {
  336. b3AlignedObjectArray<b3RigidBodyData> hostBodies;
  337. b3AlignedObjectArray<b3InertiaData> hostInertias;
  338. b3AlignedObjectArray<b3Contact4> hostContacts;
  339. {
  340. B3_PROFILE("copyToHost");
  341. gpuBodies.copyToHost(hostBodies);
  342. gpuInertias.copyToHost(hostInertias);
  343. gpuContacts.copyToHost(hostContacts);
  344. }
  345. {
  346. b3JacobiSolverInfo solverInfo;
  347. m_data->m_solver3->solveGroupHost(&hostBodies[0], &hostInertias[0], hostBodies.size(), &hostContacts[0], hostContacts.size(), solverInfo);
  348. }
  349. {
  350. B3_PROFILE("copyFromHost");
  351. gpuBodies.copyFromHost(hostBodies);
  352. }
  353. }
  354. else
  355. {
  356. int static0Index = m_data->m_narrowphase->getStatic0Index();
  357. b3JacobiSolverInfo solverInfo;
  358. //m_data->m_solver3->solveContacts( >solveGroup(&gpuBodies, &gpuInertias, &gpuContacts,solverInfo);
  359. //m_data->m_solver3->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,&hostContacts[0]);
  360. m_data->m_solver3->solveContacts(numBodies, gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL(), m_data->m_config, static0Index);
  361. }
  362. }
  363. else
  364. {
  365. b3AlignedObjectArray<b3RigidBodyData> hostBodies;
  366. gpuBodies.copyToHost(hostBodies);
  367. b3AlignedObjectArray<b3InertiaData> hostInertias;
  368. gpuInertias.copyToHost(hostInertias);
  369. b3AlignedObjectArray<b3Contact4> hostContacts;
  370. gpuContacts.copyToHost(hostContacts);
  371. {
  372. //m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,&hostContacts[0]);
  373. }
  374. gpuBodies.copyFromHost(hostBodies);
  375. }
  376. }
  377. else
  378. #endif //TEST_OTHER_GPU_SOLVER
  379. {
  380. int static0Index = m_data->m_narrowphase->getStatic0Index();
  381. m_data->m_solver2->solveContacts(numBodies, gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL(), m_data->m_config, static0Index);
  382. //m_data->m_solver4->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(), gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL());
  383. /*m_data->m_solver3->solveContactConstraintHost(
  384. (b3OpenCLArray<RigidBodyBase::Body>*)&gpuBodies,
  385. (b3OpenCLArray<RigidBodyBase::Inertia>*)&gpuInertias,
  386. (b3OpenCLArray<Constraint4>*) &gpuContacts,
  387. 0,numContacts,256);
  388. */
  389. }
  390. }
  391. integrate(deltaTime);
  392. }
  393. void b3GpuRigidBodyPipeline::integrate(float timeStep)
  394. {
  395. //integrate
  396. int numBodies = m_data->m_narrowphase->getNumRigidBodies();
  397. float angularDamp = 0.99f;
  398. if (gIntegrateOnCpu)
  399. {
  400. if (numBodies)
  401. {
  402. b3GpuNarrowPhaseInternalData* npData = m_data->m_narrowphase->getInternalData();
  403. npData->m_bodyBufferGPU->copyToHost(*npData->m_bodyBufferCPU);
  404. b3RigidBodyData_t* bodies = &npData->m_bodyBufferCPU->at(0);
  405. for (int nodeID = 0; nodeID < numBodies; nodeID++)
  406. {
  407. integrateSingleTransform(bodies, nodeID, timeStep, angularDamp, m_data->m_gravity);
  408. }
  409. npData->m_bodyBufferGPU->copyFromHost(*npData->m_bodyBufferCPU);
  410. }
  411. }
  412. else
  413. {
  414. b3LauncherCL launcher(m_data->m_queue, m_data->m_integrateTransformsKernel, "m_integrateTransformsKernel");
  415. launcher.setBuffer(m_data->m_narrowphase->getBodiesGpu());
  416. launcher.setConst(numBodies);
  417. launcher.setConst(timeStep);
  418. launcher.setConst(angularDamp);
  419. launcher.setConst(m_data->m_gravity);
  420. launcher.launch1D(numBodies);
  421. }
  422. }
  423. void b3GpuRigidBodyPipeline::setupGpuAabbsFull()
  424. {
  425. cl_int ciErrNum = 0;
  426. int numBodies = m_data->m_narrowphase->getNumRigidBodies();
  427. if (!numBodies)
  428. return;
  429. if (gCalcWorldSpaceAabbOnCpu)
  430. {
  431. if (numBodies)
  432. {
  433. if (gUseDbvt)
  434. {
  435. m_data->m_allAabbsCPU.resize(numBodies);
  436. m_data->m_narrowphase->readbackAllBodiesToCpu();
  437. for (int i = 0; i < numBodies; i++)
  438. {
  439. b3ComputeWorldAabb(i, m_data->m_narrowphase->getBodiesCpu(), m_data->m_narrowphase->getCollidablesCpu(), m_data->m_narrowphase->getLocalSpaceAabbsCpu(), &m_data->m_allAabbsCPU[0]);
  440. }
  441. m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU);
  442. }
  443. else
  444. {
  445. m_data->m_broadphaseSap->getAllAabbsCPU().resize(numBodies);
  446. m_data->m_narrowphase->readbackAllBodiesToCpu();
  447. for (int i = 0; i < numBodies; i++)
  448. {
  449. b3ComputeWorldAabb(i, m_data->m_narrowphase->getBodiesCpu(), m_data->m_narrowphase->getCollidablesCpu(), m_data->m_narrowphase->getLocalSpaceAabbsCpu(), &m_data->m_broadphaseSap->getAllAabbsCPU()[0]);
  450. }
  451. m_data->m_broadphaseSap->getAllAabbsGPU().copyFromHost(m_data->m_broadphaseSap->getAllAabbsCPU());
  452. //m_data->m_broadphaseSap->writeAabbsToGpu();
  453. }
  454. }
  455. }
  456. else
  457. {
  458. //__kernel void initializeGpuAabbsFull( const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global b3AABBCL* plocalShapeAABB, __global b3AABBCL* pAABB)
  459. b3LauncherCL launcher(m_data->m_queue, m_data->m_updateAabbsKernel, "m_updateAabbsKernel");
  460. launcher.setConst(numBodies);
  461. cl_mem bodies = m_data->m_narrowphase->getBodiesGpu();
  462. launcher.setBuffer(bodies);
  463. cl_mem collidables = m_data->m_narrowphase->getCollidablesGpu();
  464. launcher.setBuffer(collidables);
  465. cl_mem localAabbs = m_data->m_narrowphase->getAabbLocalSpaceBufferGpu();
  466. launcher.setBuffer(localAabbs);
  467. cl_mem worldAabbs = 0;
  468. if (gUseDbvt)
  469. {
  470. worldAabbs = m_data->m_allAabbsGPU->getBufferCL();
  471. }
  472. else
  473. {
  474. worldAabbs = m_data->m_broadphaseSap->getAabbBufferWS();
  475. }
  476. launcher.setBuffer(worldAabbs);
  477. launcher.launch1D(numBodies);
  478. oclCHECKERROR(ciErrNum, CL_SUCCESS);
  479. }
  480. /*
  481. b3AlignedObjectArray<b3SapAabb> aabbs;
  482. m_data->m_broadphaseSap->m_allAabbsGPU.copyToHost(aabbs);
  483. printf("numAabbs = %d\n", aabbs.size());
  484. for (int i=0;i<aabbs.size();i++)
  485. {
  486. printf("aabb[%d].m_min=%f,%f,%f,%d\n",i,aabbs[i].m_minVec[0],aabbs[i].m_minVec[1],aabbs[i].m_minVec[2],aabbs[i].m_minIndices[3]);
  487. printf("aabb[%d].m_max=%f,%f,%f,%d\n",i,aabbs[i].m_maxVec[0],aabbs[i].m_maxVec[1],aabbs[i].m_maxVec[2],aabbs[i].m_signedMaxIndices[3]);
  488. };
  489. */
  490. }
  491. cl_mem b3GpuRigidBodyPipeline::getBodyBuffer()
  492. {
  493. return m_data->m_narrowphase->getBodiesGpu();
  494. }
  495. int b3GpuRigidBodyPipeline::getNumBodies() const
  496. {
  497. return m_data->m_narrowphase->getNumRigidBodies();
  498. }
  499. void b3GpuRigidBodyPipeline::setGravity(const float* grav)
  500. {
  501. m_data->m_gravity.setValue(grav[0], grav[1], grav[2]);
  502. }
  503. void b3GpuRigidBodyPipeline::copyConstraintsToHost()
  504. {
  505. m_data->m_gpuConstraints->copyToHost(m_data->m_cpuConstraints);
  506. }
  507. void b3GpuRigidBodyPipeline::writeAllInstancesToGpu()
  508. {
  509. m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU);
  510. m_data->m_gpuConstraints->copyFromHost(m_data->m_cpuConstraints);
  511. }
  512. int b3GpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collidableIndex, int userIndex, bool writeInstanceToGpu)
  513. {
  514. b3Vector3 aabbMin = b3MakeVector3(0, 0, 0), aabbMax = b3MakeVector3(0, 0, 0);
  515. if (collidableIndex >= 0)
  516. {
  517. b3SapAabb localAabb = m_data->m_narrowphase->getLocalSpaceAabb(collidableIndex);
  518. b3Vector3 localAabbMin = b3MakeVector3(localAabb.m_min[0], localAabb.m_min[1], localAabb.m_min[2]);
  519. b3Vector3 localAabbMax = b3MakeVector3(localAabb.m_max[0], localAabb.m_max[1], localAabb.m_max[2]);
  520. b3Scalar margin = 0.01f;
  521. b3Transform t;
  522. t.setIdentity();
  523. t.setOrigin(b3MakeVector3(position[0], position[1], position[2]));
  524. t.setRotation(b3Quaternion(orientation[0], orientation[1], orientation[2], orientation[3]));
  525. b3TransformAabb(localAabbMin, localAabbMax, margin, t, aabbMin, aabbMax);
  526. }
  527. else
  528. {
  529. b3Error("registerPhysicsInstance using invalid collidableIndex\n");
  530. return -1;
  531. }
  532. bool writeToGpu = false;
  533. int bodyIndex = m_data->m_narrowphase->getNumRigidBodies();
  534. bodyIndex = m_data->m_narrowphase->registerRigidBody(collidableIndex, mass, position, orientation, &aabbMin.getX(), &aabbMax.getX(), writeToGpu);
  535. if (bodyIndex >= 0)
  536. {
  537. if (gUseDbvt)
  538. {
  539. m_data->m_broadphaseDbvt->createProxy(aabbMin, aabbMax, bodyIndex, 0, 1, 1);
  540. b3SapAabb aabb;
  541. for (int i = 0; i < 3; i++)
  542. {
  543. aabb.m_min[i] = aabbMin[i];
  544. aabb.m_max[i] = aabbMax[i];
  545. aabb.m_minIndices[3] = bodyIndex;
  546. }
  547. m_data->m_allAabbsCPU.push_back(aabb);
  548. if (writeInstanceToGpu)
  549. {
  550. m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU);
  551. }
  552. }
  553. else
  554. {
  555. if (mass)
  556. {
  557. m_data->m_broadphaseSap->createProxy(aabbMin, aabbMax, bodyIndex, 1, 1); //m_dispatcher);
  558. }
  559. else
  560. {
  561. m_data->m_broadphaseSap->createLargeProxy(aabbMin, aabbMax, bodyIndex, 1, 1); //m_dispatcher);
  562. }
  563. }
  564. }
  565. /*
  566. if (mass>0.f)
  567. m_numDynamicPhysicsInstances++;
  568. m_numPhysicsInstances++;
  569. */
  570. return bodyIndex;
  571. }
  572. void b3GpuRigidBodyPipeline::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults)
  573. {
  574. this->m_data->m_raycaster->castRays(rays, hitResults,
  575. getNumBodies(), this->m_data->m_narrowphase->getBodiesCpu(),
  576. m_data->m_narrowphase->getNumCollidablesGpu(), m_data->m_narrowphase->getCollidablesCpu(),
  577. m_data->m_narrowphase->getInternalData(), m_data->m_broadphaseSap);
  578. }