image_compress_betsy.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584
  1. /**************************************************************************/
  2. /* image_compress_betsy.cpp */
  3. /**************************************************************************/
  4. /* This file is part of: */
  5. /* GODOT ENGINE */
  6. /* https://godotengine.org */
  7. /**************************************************************************/
  8. /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
  9. /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
  10. /* */
  11. /* Permission is hereby granted, free of charge, to any person obtaining */
  12. /* a copy of this software and associated documentation files (the */
  13. /* "Software"), to deal in the Software without restriction, including */
  14. /* without limitation the rights to use, copy, modify, merge, publish, */
  15. /* distribute, sublicense, and/or sell copies of the Software, and to */
  16. /* permit persons to whom the Software is furnished to do so, subject to */
  17. /* the following conditions: */
  18. /* */
  19. /* The above copyright notice and this permission notice shall be */
  20. /* included in all copies or substantial portions of the Software. */
  21. /* */
  22. /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
  23. /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
  24. /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
  25. /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
  26. /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
  27. /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
  28. /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
  29. /**************************************************************************/
  30. #include "image_compress_betsy.h"
  31. #include "core/config/project_settings.h"
  32. #include "betsy_bc1.h"
  33. #include "bc1.glsl.gen.h"
  34. #include "bc4.glsl.gen.h"
  35. #include "bc6h.glsl.gen.h"
  36. #include "servers/display_server.h"
  37. static Mutex betsy_mutex;
  38. static BetsyCompressor *betsy = nullptr;
  39. void BetsyCompressor::_init() {
  40. if (!DisplayServer::can_create_rendering_device()) {
  41. return;
  42. }
  43. // Create local RD.
  44. RenderingContextDriver *rcd = nullptr;
  45. RenderingDevice *rd = RenderingServer::get_singleton()->create_local_rendering_device();
  46. if (rd == nullptr) {
  47. #if defined(RD_ENABLED)
  48. #if defined(METAL_ENABLED)
  49. rcd = memnew(RenderingContextDriverMetal);
  50. rd = memnew(RenderingDevice);
  51. #endif
  52. #if defined(VULKAN_ENABLED)
  53. if (rcd == nullptr) {
  54. rcd = memnew(RenderingContextDriverVulkan);
  55. rd = memnew(RenderingDevice);
  56. }
  57. #endif
  58. #endif
  59. if (rcd != nullptr && rd != nullptr) {
  60. Error err = rcd->initialize();
  61. if (err == OK) {
  62. err = rd->initialize(rcd);
  63. }
  64. if (err != OK) {
  65. memdelete(rd);
  66. memdelete(rcd);
  67. rd = nullptr;
  68. rcd = nullptr;
  69. }
  70. }
  71. }
  72. ERR_FAIL_NULL_MSG(rd, "Unable to create a local RenderingDevice.");
  73. compress_rd = rd;
  74. compress_rcd = rcd;
  75. // Create the sampler state.
  76. RD::SamplerState src_sampler_state;
  77. {
  78. src_sampler_state.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
  79. src_sampler_state.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE;
  80. src_sampler_state.mag_filter = RD::SAMPLER_FILTER_NEAREST;
  81. src_sampler_state.min_filter = RD::SAMPLER_FILTER_NEAREST;
  82. src_sampler_state.mip_filter = RD::SAMPLER_FILTER_NEAREST;
  83. }
  84. src_sampler = compress_rd->sampler_create(src_sampler_state);
  85. }
  86. void BetsyCompressor::init() {
  87. WorkerThreadPool::TaskID tid = WorkerThreadPool::get_singleton()->add_task(callable_mp(this, &BetsyCompressor::_thread_loop), true);
  88. command_queue.set_pump_task_id(tid);
  89. command_queue.push(this, &BetsyCompressor::_assign_mt_ids, tid);
  90. command_queue.push_and_sync(this, &BetsyCompressor::_init);
  91. DEV_ASSERT(task_id == tid);
  92. }
  93. void BetsyCompressor::_assign_mt_ids(WorkerThreadPool::TaskID p_pump_task_id) {
  94. task_id = p_pump_task_id;
  95. }
  96. // Yield thread to WTP so other tasks can be done on it.
  97. // Automatically regains control as soon a task is pushed to the command queue.
  98. void BetsyCompressor::_thread_loop() {
  99. while (!exit) {
  100. WorkerThreadPool::get_singleton()->yield();
  101. command_queue.flush_all();
  102. }
  103. }
  104. void BetsyCompressor::_thread_exit() {
  105. exit = true;
  106. if (compress_rd != nullptr) {
  107. if (dxt1_encoding_table_buffer.is_valid()) {
  108. compress_rd->free(dxt1_encoding_table_buffer);
  109. }
  110. compress_rd->free(src_sampler);
  111. // Clear the shader cache, pipelines will be unreferenced automatically.
  112. for (KeyValue<String, BetsyShader> &E : cached_shaders) {
  113. if (E.value.compiled.is_valid()) {
  114. compress_rd->free(E.value.compiled);
  115. }
  116. }
  117. cached_shaders.clear();
  118. }
  119. }
  120. void BetsyCompressor::finish() {
  121. command_queue.push(this, &BetsyCompressor::_thread_exit);
  122. if (task_id != WorkerThreadPool::INVALID_TASK_ID) {
  123. WorkerThreadPool::get_singleton()->wait_for_task_completion(task_id);
  124. task_id = WorkerThreadPool::INVALID_TASK_ID;
  125. }
  126. if (compress_rd != nullptr) {
  127. // Free the RD (and RCD if necessary).
  128. memdelete(compress_rd);
  129. compress_rd = nullptr;
  130. if (compress_rcd != nullptr) {
  131. memdelete(compress_rcd);
  132. compress_rcd = nullptr;
  133. }
  134. }
  135. }
  136. // Helper functions.
  137. static int get_next_multiple(int n, int m) {
  138. return n + (m - (n % m));
  139. }
  140. static String get_shader_name(BetsyFormat p_format) {
  141. switch (p_format) {
  142. case BETSY_FORMAT_BC1:
  143. case BETSY_FORMAT_BC1_DITHER:
  144. return "BC1";
  145. case BETSY_FORMAT_BC3:
  146. return "BC3";
  147. case BETSY_FORMAT_BC4_SIGNED:
  148. case BETSY_FORMAT_BC4_UNSIGNED:
  149. return "BC4";
  150. case BETSY_FORMAT_BC6_SIGNED:
  151. case BETSY_FORMAT_BC6_UNSIGNED:
  152. return "BC6";
  153. default:
  154. return "";
  155. }
  156. }
  157. Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) {
  158. uint64_t start_time = OS::get_singleton()->get_ticks_msec();
  159. // Return an error so that the compression can fall back to cpu compression
  160. if (compress_rd == nullptr) {
  161. return ERR_CANT_CREATE;
  162. }
  163. if (r_img->is_compressed()) {
  164. return ERR_INVALID_DATA;
  165. }
  166. Error err = OK;
  167. // Destination format.
  168. Image::Format dest_format = Image::FORMAT_MAX;
  169. RD::DataFormat dst_rd_format = RD::DATA_FORMAT_MAX;
  170. String version = "";
  171. switch (p_format) {
  172. case BETSY_FORMAT_BC1:
  173. version = "standard";
  174. dst_rd_format = RD::DATA_FORMAT_R32G32_UINT;
  175. dest_format = Image::FORMAT_DXT1;
  176. break;
  177. case BETSY_FORMAT_BC1_DITHER:
  178. version = "dithered";
  179. dst_rd_format = RD::DATA_FORMAT_R32G32_UINT;
  180. dest_format = Image::FORMAT_DXT1;
  181. break;
  182. case BETSY_FORMAT_BC4_UNSIGNED:
  183. version = "unsigned";
  184. dst_rd_format = RD::DATA_FORMAT_R32G32_UINT;
  185. dest_format = Image::FORMAT_RGTC_R;
  186. break;
  187. case BETSY_FORMAT_BC6_SIGNED:
  188. version = "signed";
  189. dst_rd_format = RD::DATA_FORMAT_R32G32B32A32_UINT;
  190. dest_format = Image::FORMAT_BPTC_RGBF;
  191. break;
  192. case BETSY_FORMAT_BC6_UNSIGNED:
  193. version = "unsigned";
  194. dst_rd_format = RD::DATA_FORMAT_R32G32B32A32_UINT;
  195. dest_format = Image::FORMAT_BPTC_RGBFU;
  196. break;
  197. default:
  198. err = ERR_INVALID_PARAMETER;
  199. break;
  200. }
  201. const String shader_name = get_shader_name(p_format) + "-" + version;
  202. BetsyShader shader;
  203. if (cached_shaders.has(shader_name)) {
  204. shader = cached_shaders[shader_name];
  205. } else {
  206. Ref<RDShaderFile> source;
  207. source.instantiate();
  208. switch (p_format) {
  209. case BETSY_FORMAT_BC1:
  210. case BETSY_FORMAT_BC1_DITHER:
  211. err = source->parse_versions_from_text(bc1_shader_glsl);
  212. break;
  213. case BETSY_FORMAT_BC4_SIGNED:
  214. case BETSY_FORMAT_BC4_UNSIGNED:
  215. err = source->parse_versions_from_text(bc4_shader_glsl);
  216. break;
  217. case BETSY_FORMAT_BC6_SIGNED:
  218. case BETSY_FORMAT_BC6_UNSIGNED:
  219. err = source->parse_versions_from_text(bc6h_shader_glsl);
  220. break;
  221. default:
  222. err = ERR_INVALID_PARAMETER;
  223. break;
  224. }
  225. if (err != OK) {
  226. source->print_errors("Betsy compress shader");
  227. return err;
  228. }
  229. // Compile the shader, return early if invalid.
  230. shader.compiled = compress_rd->shader_create_from_spirv(source->get_spirv_stages(version));
  231. if (shader.compiled.is_null()) {
  232. return ERR_CANT_CREATE;
  233. }
  234. // Compile the pipeline, return early if invalid.
  235. shader.pipeline = compress_rd->compute_pipeline_create(shader.compiled);
  236. if (shader.pipeline.is_null()) {
  237. return ERR_CANT_CREATE;
  238. }
  239. cached_shaders[shader_name] = shader;
  240. }
  241. if (shader.compiled.is_null() || shader.pipeline.is_null()) {
  242. return ERR_INVALID_DATA;
  243. }
  244. // src_texture format information.
  245. RD::TextureFormat src_texture_format;
  246. {
  247. src_texture_format.array_layers = 1;
  248. src_texture_format.depth = 1;
  249. src_texture_format.mipmaps = 1;
  250. src_texture_format.texture_type = RD::TEXTURE_TYPE_2D;
  251. src_texture_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT;
  252. }
  253. switch (r_img->get_format()) {
  254. case Image::FORMAT_L8:
  255. r_img->convert(Image::FORMAT_RGBA8);
  256. src_texture_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
  257. break;
  258. case Image::FORMAT_LA8:
  259. r_img->convert(Image::FORMAT_RGBA8);
  260. src_texture_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
  261. break;
  262. case Image::FORMAT_R8:
  263. src_texture_format.format = RD::DATA_FORMAT_R8_UNORM;
  264. break;
  265. case Image::FORMAT_RG8:
  266. src_texture_format.format = RD::DATA_FORMAT_R8G8_UNORM;
  267. break;
  268. case Image::FORMAT_RGB8:
  269. r_img->convert(Image::FORMAT_RGBA8);
  270. src_texture_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
  271. break;
  272. case Image::FORMAT_RGBA8:
  273. src_texture_format.format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
  274. break;
  275. case Image::FORMAT_RH:
  276. src_texture_format.format = RD::DATA_FORMAT_R16_SFLOAT;
  277. break;
  278. case Image::FORMAT_RGH:
  279. src_texture_format.format = RD::DATA_FORMAT_R16G16_SFLOAT;
  280. break;
  281. case Image::FORMAT_RGBH:
  282. r_img->convert(Image::FORMAT_RGBAH);
  283. src_texture_format.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT;
  284. break;
  285. case Image::FORMAT_RGBAH:
  286. src_texture_format.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT;
  287. break;
  288. case Image::FORMAT_RF:
  289. src_texture_format.format = RD::DATA_FORMAT_R32_SFLOAT;
  290. break;
  291. case Image::FORMAT_RGF:
  292. src_texture_format.format = RD::DATA_FORMAT_R32G32_SFLOAT;
  293. break;
  294. case Image::FORMAT_RGBF:
  295. r_img->convert(Image::FORMAT_RGBAF);
  296. src_texture_format.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT;
  297. break;
  298. case Image::FORMAT_RGBAF:
  299. src_texture_format.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT;
  300. break;
  301. case Image::FORMAT_RGBE9995:
  302. src_texture_format.format = RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32;
  303. break;
  304. default: {
  305. return err;
  306. }
  307. }
  308. // For the destination format just copy the source format and change the usage bits.
  309. RD::TextureFormat dst_texture_format = src_texture_format;
  310. dst_texture_format.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT;
  311. dst_texture_format.format = dst_rd_format;
  312. // Encoding table setup.
  313. if (dest_format == Image::FORMAT_DXT1 && dxt1_encoding_table_buffer.is_null()) {
  314. Vector<uint8_t> data;
  315. data.resize(1024 * 4);
  316. memcpy(data.ptrw(), dxt1_encoding_table, 1024 * 4);
  317. dxt1_encoding_table_buffer = compress_rd->storage_buffer_create(1024 * 4, data);
  318. }
  319. const int mip_count = r_img->get_mipmap_count() + 1;
  320. // Container for the compressed data.
  321. Vector<uint8_t> dst_data;
  322. dst_data.resize(Image::get_image_data_size(r_img->get_width(), r_img->get_height(), dest_format, r_img->has_mipmaps()));
  323. uint8_t *dst_data_ptr = dst_data.ptrw();
  324. Vector<Vector<uint8_t>> src_images;
  325. src_images.push_back(Vector<uint8_t>());
  326. Vector<uint8_t> *src_image_ptr = src_images.ptrw();
  327. // Compress each mipmap.
  328. for (int i = 0; i < mip_count; i++) {
  329. int64_t ofs, size;
  330. int width, height;
  331. r_img->get_mipmap_offset_size_and_dimensions(i, ofs, size, width, height);
  332. // Set the source texture width and size.
  333. src_texture_format.height = height;
  334. src_texture_format.width = width;
  335. // Set the destination texture width and size.
  336. dst_texture_format.height = (height + 3) >> 2;
  337. dst_texture_format.width = (width + 3) >> 2;
  338. // Create a buffer filled with the source mip layer data.
  339. src_image_ptr[0].resize(size);
  340. memcpy(src_image_ptr[0].ptrw(), r_img->ptr() + ofs, size);
  341. // Create the textures on the GPU.
  342. RID src_texture = compress_rd->texture_create(src_texture_format, RD::TextureView(), src_images);
  343. RID dst_texture = compress_rd->texture_create(dst_texture_format, RD::TextureView());
  344. Vector<RD::Uniform> uniforms;
  345. {
  346. {
  347. RD::Uniform u;
  348. u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;
  349. u.binding = 0;
  350. u.append_id(src_sampler);
  351. u.append_id(src_texture);
  352. uniforms.push_back(u);
  353. }
  354. {
  355. RD::Uniform u;
  356. u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
  357. u.binding = 1;
  358. u.append_id(dst_texture);
  359. uniforms.push_back(u);
  360. }
  361. if (dest_format == Image::FORMAT_DXT1) {
  362. RD::Uniform u;
  363. u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
  364. u.binding = 2;
  365. u.append_id(dxt1_encoding_table_buffer);
  366. uniforms.push_back(u);
  367. }
  368. }
  369. RID uniform_set = compress_rd->uniform_set_create(uniforms, shader.compiled, 0);
  370. RD::ComputeListID compute_list = compress_rd->compute_list_begin();
  371. compress_rd->compute_list_bind_compute_pipeline(compute_list, shader.pipeline);
  372. compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);
  373. switch (dest_format) {
  374. case Image::FORMAT_BPTC_RGBFU:
  375. case Image::FORMAT_BPTC_RGBF: {
  376. BC6PushConstant push_constant;
  377. push_constant.sizeX = 1.0f / width;
  378. push_constant.sizeY = 1.0f / height;
  379. push_constant.padding[0] = 0;
  380. push_constant.padding[1] = 0;
  381. compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC6PushConstant));
  382. compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1);
  383. } break;
  384. case Image::FORMAT_DXT1: {
  385. BC1PushConstant push_constant;
  386. push_constant.num_refines = 2;
  387. push_constant.padding[0] = 0;
  388. push_constant.padding[1] = 0;
  389. push_constant.padding[2] = 0;
  390. compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC1PushConstant));
  391. compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1);
  392. } break;
  393. case Image::FORMAT_RGTC_R: {
  394. BC4PushConstant push_constant;
  395. push_constant.channel_idx = 0;
  396. push_constant.padding[0] = 0;
  397. push_constant.padding[1] = 0;
  398. push_constant.padding[2] = 0;
  399. compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC4PushConstant));
  400. compress_rd->compute_list_dispatch(compute_list, 1, get_next_multiple(width, 16) / 16, get_next_multiple(height, 16) / 16);
  401. } break;
  402. default: {
  403. } break;
  404. }
  405. compress_rd->compute_list_end();
  406. compress_rd->submit();
  407. compress_rd->sync();
  408. // Copy data from the GPU to the buffer.
  409. const Vector<uint8_t> texture_data = compress_rd->texture_get_data(dst_texture, 0);
  410. int64_t dst_ofs = Image::get_image_mipmap_offset(r_img->get_width(), r_img->get_height(), dest_format, i);
  411. memcpy(dst_data_ptr + dst_ofs, texture_data.ptr(), texture_data.size());
  412. // Free the source and dest texture.
  413. compress_rd->free(dst_texture);
  414. compress_rd->free(src_texture);
  415. }
  416. src_images.clear();
  417. // Set the compressed data to the image.
  418. r_img->set_data(r_img->get_width(), r_img->get_height(), r_img->has_mipmaps(), dest_format, dst_data);
  419. print_verbose(vformat("Betsy: Encoding took %d ms.", OS::get_singleton()->get_ticks_msec() - start_time));
  420. return OK;
  421. }
  422. void ensure_betsy_exists() {
  423. betsy_mutex.lock();
  424. if (betsy == nullptr) {
  425. betsy = memnew(BetsyCompressor);
  426. betsy->init();
  427. }
  428. betsy_mutex.unlock();
  429. }
  430. Error _betsy_compress_bptc(Image *r_img, Image::UsedChannels p_channels) {
  431. ensure_betsy_exists();
  432. Image::Format format = r_img->get_format();
  433. Error result = ERR_UNAVAILABLE;
  434. if (format >= Image::FORMAT_RF && format <= Image::FORMAT_RGBE9995) {
  435. if (r_img->detect_signed()) {
  436. result = betsy->compress(BETSY_FORMAT_BC6_SIGNED, r_img);
  437. } else {
  438. result = betsy->compress(BETSY_FORMAT_BC6_UNSIGNED, r_img);
  439. }
  440. }
  441. if (!GLOBAL_GET("rendering/textures/vram_compression/cache_gpu_compressor")) {
  442. free_device();
  443. }
  444. return result;
  445. }
  446. Error _betsy_compress_s3tc(Image *r_img, Image::UsedChannels p_channels) {
  447. ensure_betsy_exists();
  448. Error result = ERR_UNAVAILABLE;
  449. switch (p_channels) {
  450. case Image::USED_CHANNELS_RGB:
  451. case Image::USED_CHANNELS_L:
  452. result = betsy->compress(BETSY_FORMAT_BC1, r_img);
  453. break;
  454. case Image::USED_CHANNELS_R:
  455. result = betsy->compress(BETSY_FORMAT_BC4_UNSIGNED, r_img);
  456. break;
  457. default:
  458. break;
  459. }
  460. if (!GLOBAL_GET("rendering/textures/vram_compression/cache_gpu_compressor")) {
  461. free_device();
  462. }
  463. return result;
  464. }
  465. void free_device() {
  466. if (betsy != nullptr) {
  467. betsy->finish();
  468. memdelete(betsy);
  469. }
  470. }