12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485 |
- #ifndef B3_RADIXSORT32_H
- #define B3_RADIXSORT32_H
- #include "b3OpenCLArray.h"
- struct b3SortData
- {
- union {
- unsigned int m_key;
- unsigned int x;
- };
- union {
- unsigned int m_value;
- unsigned int y;
- };
- };
- #include "b3BufferInfoCL.h"
- class b3RadixSort32CL
- {
- b3OpenCLArray<unsigned int>* m_workBuffer1;
- b3OpenCLArray<unsigned int>* m_workBuffer2;
- b3OpenCLArray<b3SortData>* m_workBuffer3;
- b3OpenCLArray<b3SortData>* m_workBuffer4;
- b3OpenCLArray<unsigned int>* m_workBuffer3a;
- b3OpenCLArray<unsigned int>* m_workBuffer4a;
- cl_command_queue m_commandQueue;
- cl_kernel m_streamCountSortDataKernel;
- cl_kernel m_streamCountKernel;
- cl_kernel m_prefixScanKernel;
- cl_kernel m_sortAndScatterSortDataKernel;
- cl_kernel m_sortAndScatterKernel;
- bool m_deviceCPU;
- class b3PrefixScanCL* m_scan;
- class b3FillCL* m_fill;
- public:
- struct b3ConstData
- {
- int m_n;
- int m_nWGs;
- int m_startBit;
- int m_nBlocksPerWG;
- };
- enum
- {
- DATA_ALIGNMENT = 256,
- WG_SIZE = 64,
- BLOCK_SIZE = 256,
- ELEMENTS_PER_WORK_ITEM = (BLOCK_SIZE / WG_SIZE),
- BITS_PER_PASS = 4,
- NUM_BUCKET = (1 << BITS_PER_PASS),
- // if you change this, change nPerWI in kernel as well
- NUM_WGS = 20 * 6, // cypress
- // NUM_WGS = 24*6, // cayman
- // NUM_WGS = 32*4, // nv
- };
- private:
- public:
- b3RadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity = 0);
- virtual ~b3RadixSort32CL();
- void execute(b3OpenCLArray<unsigned int>& keysIn, b3OpenCLArray<unsigned int>& keysOut, b3OpenCLArray<unsigned int>& valuesIn,
- b3OpenCLArray<unsigned int>& valuesOut, int n, int sortBits = 32);
- ///keys only
- void execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBits = 32);
- void execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32);
- void executeHost(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32);
- void executeHost(b3AlignedObjectArray<b3SortData>& keyValuesInOut, int sortBits = 32);
- };
- #endif //B3_RADIXSORT32_H
|