astcenc_internal_entry.h 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. // SPDX-License-Identifier: Apache-2.0
  2. // ----------------------------------------------------------------------------
  3. // Copyright 2011-2022 Arm Limited
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the "License"); you may not
  6. // use this file except in compliance with the License. You may obtain a copy
  7. // of the License at:
  8. //
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. //
  11. // Unless required by applicable law or agreed to in writing, software
  12. // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  13. // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  14. // License for the specific language governing permissions and limitations
  15. // under the License.
  16. // ----------------------------------------------------------------------------
  17. /**
  18. * @brief Functions and data declarations for the outer context.
  19. *
  20. * The outer context includes thread-pool management, which is slower to
  21. * compile due to increased use of C++ stdlib. The inner context used in the
  22. * majority of the codec library does not include this.
  23. */
  24. #ifndef ASTCENC_INTERNAL_ENTRY_INCLUDED
  25. #define ASTCENC_INTERNAL_ENTRY_INCLUDED
  26. #include <atomic>
  27. #include <condition_variable>
  28. #include <functional>
  29. #include <mutex>
  30. #include "astcenc_internal.h"
  31. /* ============================================================================
  32. Parallel execution control
  33. ============================================================================ */
  34. /**
  35. * @brief A simple counter-based manager for parallel task execution.
  36. *
  37. * The task processing execution consists of:
  38. *
  39. * * A single-threaded init stage.
  40. * * A multi-threaded processing stage.
  41. * * A condition variable so threads can wait for processing completion.
  42. *
  43. * The init stage will be executed by the first thread to arrive in the critical section, there is
  44. * no main thread in the thread pool.
  45. *
  46. * The processing stage uses dynamic dispatch to assign task tickets to threads on an on-demand
  47. * basis. Threads may each therefore executed different numbers of tasks, depending on their
  48. * processing complexity. The task queue and the task tickets are just counters; the caller must map
  49. * these integers to an actual processing partition in a specific problem domain.
  50. *
  51. * The exit wait condition is needed to ensure processing has finished before a worker thread can
  52. * progress to the next stage of the pipeline. Specifically a worker may exit the processing stage
  53. * because there are no new tasks to assign to it while other worker threads are still processing.
  54. * Calling @c wait() will ensure that all other worker have finished before the thread can proceed.
  55. *
  56. * The basic usage model:
  57. *
  58. * // --------- From single-threaded code ---------
  59. *
  60. * // Reset the tracker state
  61. * manager->reset()
  62. *
  63. * // --------- From multi-threaded code ---------
  64. *
  65. * // Run the stage init; only first thread actually runs the lambda
  66. * manager->init(<lambda>)
  67. *
  68. * do
  69. * {
  70. * // Request a task assignment
  71. * uint task_count;
  72. * uint base_index = manager->get_tasks(<granule>, task_count);
  73. *
  74. * // Process any tasks we were given (task_count <= granule size)
  75. * if (task_count)
  76. * {
  77. * // Run the user task processing code for N tasks here
  78. * ...
  79. *
  80. * // Flag these tasks as complete
  81. * manager->complete_tasks(task_count);
  82. * }
  83. * } while (task_count);
  84. *
  85. * // Wait for all threads to complete tasks before progressing
  86. * manager->wait()
  87. *
  88. * // Run the stage term; only first thread actually runs the lambda
  89. * manager->term(<lambda>)
  90. */
  91. class ParallelManager
  92. {
  93. private:
  94. /** @brief Lock used for critical section and condition synchronization. */
  95. std::mutex m_lock;
  96. /** @brief True if the stage init() step has been executed. */
  97. bool m_init_done;
  98. /** @brief True if the stage term() step has been executed. */
  99. bool m_term_done;
  100. /** @brief Condition variable for tracking stage processing completion. */
  101. std::condition_variable m_complete;
  102. /** @brief Number of tasks started, but not necessarily finished. */
  103. std::atomic<unsigned int> m_start_count;
  104. /** @brief Number of tasks finished. */
  105. unsigned int m_done_count;
  106. /** @brief Number of tasks that need to be processed. */
  107. unsigned int m_task_count;
  108. public:
  109. /** @brief Create a new ParallelManager. */
  110. ParallelManager()
  111. {
  112. reset();
  113. }
  114. /**
  115. * @brief Reset the tracker for a new processing batch.
  116. *
  117. * This must be called from single-threaded code before starting the multi-threaded processing
  118. * operations.
  119. */
  120. void reset()
  121. {
  122. m_init_done = false;
  123. m_term_done = false;
  124. m_start_count = 0;
  125. m_done_count = 0;
  126. m_task_count = 0;
  127. }
  128. /**
  129. * @brief Trigger the pipeline stage init step.
  130. *
  131. * This can be called from multi-threaded code. The first thread to hit this will process the
  132. * initialization. Other threads will block and wait for it to complete.
  133. *
  134. * @param init_func Callable which executes the stage initialization. It must return the
  135. * total number of tasks in the stage.
  136. */
  137. void init(std::function<unsigned int(void)> init_func)
  138. {
  139. std::lock_guard<std::mutex> lck(m_lock);
  140. if (!m_init_done)
  141. {
  142. m_task_count = init_func();
  143. m_init_done = true;
  144. }
  145. }
  146. /**
  147. * @brief Trigger the pipeline stage init step.
  148. *
  149. * This can be called from multi-threaded code. The first thread to hit this will process the
  150. * initialization. Other threads will block and wait for it to complete.
  151. *
  152. * @param task_count Total number of tasks needing processing.
  153. */
  154. void init(unsigned int task_count)
  155. {
  156. std::lock_guard<std::mutex> lck(m_lock);
  157. if (!m_init_done)
  158. {
  159. m_task_count = task_count;
  160. m_init_done = true;
  161. }
  162. }
  163. /**
  164. * @brief Request a task assignment.
  165. *
  166. * Assign up to @c granule tasks to the caller for processing.
  167. *
  168. * @param granule Maximum number of tasks that can be assigned.
  169. * @param[out] count Actual number of tasks assigned, or zero if no tasks were assigned.
  170. *
  171. * @return Task index of the first assigned task; assigned tasks increment from this.
  172. */
  173. unsigned int get_task_assignment(unsigned int granule, unsigned int& count)
  174. {
  175. unsigned int base = m_start_count.fetch_add(granule, std::memory_order_relaxed);
  176. if (base >= m_task_count)
  177. {
  178. count = 0;
  179. return 0;
  180. }
  181. count = astc::min(m_task_count - base, granule);
  182. return base;
  183. }
  184. /**
  185. * @brief Complete a task assignment.
  186. *
  187. * Mark @c count tasks as complete. This will notify all threads blocked on @c wait() if this
  188. * completes the processing of the stage.
  189. *
  190. * @param count The number of completed tasks.
  191. */
  192. void complete_task_assignment(unsigned int count)
  193. {
  194. // Note: m_done_count cannot use an atomic without the mutex; this has a race between the
  195. // update here and the wait() for other threads
  196. std::unique_lock<std::mutex> lck(m_lock);
  197. this->m_done_count += count;
  198. if (m_done_count == m_task_count)
  199. {
  200. lck.unlock();
  201. m_complete.notify_all();
  202. }
  203. }
  204. /**
  205. * @brief Wait for stage processing to complete.
  206. */
  207. void wait()
  208. {
  209. std::unique_lock<std::mutex> lck(m_lock);
  210. m_complete.wait(lck, [this]{ return m_done_count == m_task_count; });
  211. }
  212. /**
  213. * @brief Trigger the pipeline stage term step.
  214. *
  215. * This can be called from multi-threaded code. The first thread to hit this will process the
  216. * work pool termination. Caller must have called @c wait() prior to calling this function to
  217. * ensure that processing is complete.
  218. *
  219. * @param term_func Callable which executes the stage termination.
  220. */
  221. void term(std::function<void(void)> term_func)
  222. {
  223. std::lock_guard<std::mutex> lck(m_lock);
  224. if (!m_term_done)
  225. {
  226. term_func();
  227. m_term_done = true;
  228. }
  229. }
  230. };
  231. /**
  232. * @brief The astcenc compression context.
  233. */
  234. struct astcenc_context
  235. {
  236. /** @brief The context internal state. */
  237. astcenc_contexti context;
  238. #if !defined(ASTCENC_DECOMPRESS_ONLY)
  239. /** @brief The parallel manager for averages computation. */
  240. ParallelManager manage_avg;
  241. /** @brief The parallel manager for compression. */
  242. ParallelManager manage_compress;
  243. #endif
  244. /** @brief The parallel manager for decompression. */
  245. ParallelManager manage_decompress;
  246. };
  247. #endif