network.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437
  1. // ======================================================================== //
  2. // Copyright 2009-2019 Intel Corporation //
  3. // //
  4. // Licensed under the Apache License, Version 2.0 (the "License"); //
  5. // you may not use this file except in compliance with the License. //
  6. // You may obtain a copy of the License at //
  7. // //
  8. // http://www.apache.org/licenses/LICENSE-2.0 //
  9. // //
  10. // Unless required by applicable law or agreed to in writing, software //
  11. // distributed under the License is distributed on an "AS IS" BASIS, //
  12. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
  13. // See the License for the specific language governing permissions and //
  14. // limitations under the License. //
  15. // ======================================================================== //
  16. #include "upsample.h"
  17. #include "weights_reorder.h"
  18. #include "network.h"
  19. // -- GODOT start --
  20. #include <cstring>
  21. // -- GODOT end --
  22. namespace oidn {
  23. template<int K>
  24. Network<K>::Network(const Ref<Device>& device, const std::map<std::string, Tensor>& weightMap)
  25. : device(device),
  26. eng(engine::cpu, 0),
  27. sm(eng),
  28. weightMap(weightMap)
  29. {
  30. }
  31. template<int K>
  32. void Network<K>::execute(const Progress& progress, int taskIndex)
  33. {
  34. if (progress.func)
  35. {
  36. const double value = double(taskIndex) / double(progress.taskCount);
  37. if (!progress.func(progress.userPtr, value))
  38. throw Exception(Error::Cancelled, "execution was cancelled");
  39. }
  40. for (size_t i = 0; i < nodes.size(); ++i)
  41. {
  42. nodes[i]->execute(sm);
  43. if (progress.func)
  44. {
  45. const double value = (double(taskIndex) + double(i+1) / double(nodes.size())) / double(progress.taskCount);
  46. if (!progress.func(progress.userPtr, value))
  47. throw Exception(Error::Cancelled, "execution was cancelled");
  48. }
  49. }
  50. }
  51. template<int K>
  52. std::shared_ptr<memory> Network<K>::allocTensor(const memory::dims& dims,
  53. memory::format_tag format,
  54. void* data)
  55. {
  56. if (format == memory::format_tag::any)
  57. {
  58. if (dims.size() == 4)
  59. format = BlockedFormat<K>::nChwKc;
  60. else if (dims.size() == 1)
  61. format = memory::format_tag::x;
  62. else
  63. assert(0);
  64. }
  65. memory::desc desc(dims, memory::data_type::f32, format);
  66. if (data == nullptr)
  67. {
  68. const size_t bytes = getTensorSize(dims) * sizeof(float);
  69. if (format == BlockedFormat<K>::nChwKc)
  70. activationAllocBytes += bytes;
  71. totalAllocBytes += bytes;
  72. return std::make_shared<memory>(desc, eng);
  73. }
  74. else
  75. {
  76. return std::make_shared<memory>(desc, eng, data);
  77. }
  78. }
  79. template<int K>
  80. std::shared_ptr<memory> Network<K>::castTensor(const memory::dims& dims,
  81. const std::shared_ptr<memory>& src,
  82. size_t srcOffset,
  83. memory::format_tag format)
  84. {
  85. const mkldnn_memory_desc_t& srcDesc = src->get_desc().data;
  86. MAYBE_UNUSED(srcDesc);
  87. assert(srcDesc.data_type == memory::data_type::f32);
  88. assert(getTensorSize(src) >= srcOffset + getTensorSize(dims));
  89. if (format == memory::format_tag::any)
  90. {
  91. if (dims.size() == 4)
  92. format = BlockedFormat<K>::nChwKc;
  93. else if (dims.size() == 1)
  94. format = memory::format_tag::x;
  95. else
  96. assert(0);
  97. }
  98. memory::desc desc(dims, memory::data_type::f32, format);
  99. float* srcPtr = (float*)src->get_data_handle() + srcOffset;
  100. return std::make_shared<memory>(desc, eng, srcPtr);
  101. }
  102. template<int K>
  103. std::shared_ptr<memory> Network<K>::castTensor(const memory::dims& dims,
  104. const std::shared_ptr<memory>& src,
  105. const memory::dims& srcOffset)
  106. {
  107. return castTensor(dims, src, getTensorSize(srcOffset));
  108. }
  109. template<int K>
  110. void Network<K>::zeroTensor(const std::shared_ptr<memory>& dst)
  111. {
  112. assert(getTensorType(dst) == memory::data_type::f32);
  113. memset(dst->get_data_handle(), 0, getTensorSize(dst)*sizeof(float));
  114. }
  115. template<int K>
  116. memory::dims Network<K>::getInputReorderDims(const memory::dims& srcDims, int alignment)
  117. {
  118. memory::dims dstDims = srcDims;
  119. dstDims[1] = getPadded<K>(srcDims[1]); // round up C
  120. dstDims[2] = roundUp(srcDims[2], memory::dim(alignment)); // round up H
  121. dstDims[3] = roundUp(srcDims[3], memory::dim(alignment)); // round up W
  122. return dstDims;
  123. }
  124. template<int K>
  125. std::shared_ptr<Node> Network<K>::addInputReorder(const Image& color,
  126. const Image& albedo,
  127. const Image& normal,
  128. const std::shared_ptr<TransferFunction>& transferFunc,
  129. int alignment,
  130. const std::shared_ptr<memory>& userDst)
  131. {
  132. assert(color);
  133. int inputC = 3;
  134. if (albedo) inputC += 3;
  135. if (normal) inputC += 3;
  136. memory::dims srcDims = {1, inputC, color.height, color.width};
  137. memory::dims dstDims = getInputReorderDims(srcDims, alignment);
  138. // Allocate padded memory
  139. auto dst = userDst;
  140. if (!dst)
  141. dst = allocTensor(dstDims);
  142. // Push node
  143. std::shared_ptr<Node> node;
  144. if (auto tf = std::dynamic_pointer_cast<LinearTransferFunction>(transferFunc))
  145. node = std::make_shared<InputReorderNode<K, LinearTransferFunction>>(color, albedo, normal, dst, tf);
  146. else if (auto tf = std::dynamic_pointer_cast<GammaTransferFunction>(transferFunc))
  147. node = std::make_shared<InputReorderNode<K, GammaTransferFunction>>(color, albedo, normal, dst, tf);
  148. else if (auto tf = std::dynamic_pointer_cast<LogTransferFunction>(transferFunc))
  149. node = std::make_shared<InputReorderNode<K, LogTransferFunction>>(color, albedo, normal, dst, tf);
  150. else if (auto tf = std::dynamic_pointer_cast<PQXTransferFunction>(transferFunc))
  151. node = std::make_shared<InputReorderNode<K, PQXTransferFunction>>(color, albedo, normal, dst, tf);
  152. else
  153. assert(0);
  154. nodes.push_back(node);
  155. return node;
  156. }
  157. template<int K>
  158. std::shared_ptr<Node> Network<K>::addOutputReorder(const std::shared_ptr<memory>& src,
  159. const std::shared_ptr<TransferFunction>& transferFunc,
  160. const Image& output)
  161. {
  162. memory::dims srcDims = getTensorDims(src);
  163. assert(srcDims[1] == K);
  164. // Push node
  165. std::shared_ptr<Node> node;
  166. if (auto tf = std::dynamic_pointer_cast<LinearTransferFunction>(transferFunc))
  167. node = std::make_shared<OutputReorderNode<K, LinearTransferFunction>>(src, output, tf);
  168. else if (auto tf = std::dynamic_pointer_cast<GammaTransferFunction>(transferFunc))
  169. node = std::make_shared<OutputReorderNode<K, GammaTransferFunction>>(src, output, tf);
  170. else if (auto tf = std::dynamic_pointer_cast<LogTransferFunction>(transferFunc))
  171. node = std::make_shared<OutputReorderNode<K, LogTransferFunction>>(src, output, tf);
  172. else if (auto tf = std::dynamic_pointer_cast<PQXTransferFunction>(transferFunc))
  173. node = std::make_shared<OutputReorderNode<K, PQXTransferFunction>>(src, output, tf);
  174. else
  175. assert(0);
  176. nodes.push_back(node);
  177. return node;
  178. }
  179. template<int K>
  180. memory::dims Network<K>::getConvDims(const std::string& name, const memory::dims& srcDims)
  181. {
  182. auto b = weightMap[name + "/b"];
  183. memory::dims dstDims = srcDims;
  184. dstDims[1] = getPadded<K>(b.dims[0]); // dstDims[C] = getPadded(OC)
  185. return dstDims;
  186. }
  187. template<int K>
  188. std::shared_ptr<Node> Network<K>::addConv(const std::string& name,
  189. const std::shared_ptr<memory>& src,
  190. const std::shared_ptr<memory>& userDst,
  191. bool relu)
  192. {
  193. const memory::dims strides = {1, 1};
  194. const memory::dims padding = {1, 1};
  195. memory::dims srcDims = getTensorDims(src);
  196. // Get the weights
  197. const auto& W = weightMap[name + "/W"];
  198. if (W.ndims() != 4 || W.format != "oihw")
  199. throw Exception(Error::InvalidOperation, "invalid convolution weights");
  200. memory::dims weightsDims = W.dims;
  201. auto userWeights = allocTensor(weightsDims, memory::format_tag::oihw, W.data);
  202. // Pad the weights
  203. memory::dims weightsPadDims = weightsDims;
  204. weightsPadDims[1] = getPadded<K>(weightsDims[1]); // IC
  205. weightsPadDims[0] = getPadded<K>(weightsDims[0]); // OC
  206. assert(srcDims[1] == weightsPadDims[1]); // srcDims[C] == weightsPadDims[IC]
  207. auto weightsPad = allocTensor(weightsPadDims, memory::format_tag::oihw);
  208. WeightsReorderNode<K>(userWeights, weightsPad).execute(sm);
  209. // Get the biases
  210. const auto& b = weightMap[name + "/b"];
  211. if (b.ndims() != 1)
  212. throw Exception(Error::InvalidOperation, "invalid convolution biases");
  213. memory::dims biasDims = b.dims;
  214. // Copy/pad the biases
  215. memory::dims biasPadDims = {getPadded<K>(biasDims[0])};
  216. auto bias = allocTensor(biasPadDims);
  217. if (biasDims[0] != biasPadDims[0])
  218. memset(bias->get_data_handle(), 0, biasPadDims[0]*sizeof(float));
  219. memcpy(bias->get_data_handle(), b.data, biasDims[0]*sizeof(float));
  220. // Allocate memory for destination
  221. memory::dims dstDims = srcDims;
  222. dstDims[1] = weightsPadDims[0]; // dstDims[C] = weightsPadDims[OC]
  223. std::shared_ptr<memory> dst;
  224. if (!userDst)
  225. dst = allocTensor(dstDims);
  226. else if (getTensorDims(userDst) == dstDims)
  227. dst = userDst;
  228. else
  229. dst = castTensor(dstDims, userDst);
  230. // Create a convolution
  231. // Let the convolution primitive choose the weights format
  232. auto weightsDesc = memory::desc({ weightsPadDims }, memory::data_type::f32, memory::format_tag::any);
  233. auto convAlgo = (K == 16) ? convolution_winograd : convolution_direct;
  234. auto convDesc = convolution_forward::desc(
  235. prop_kind::forward_inference, convAlgo,
  236. src->get_desc(),
  237. weightsDesc,
  238. bias->get_desc(),
  239. dst->get_desc(),
  240. strides, padding, padding, padding_kind::zero);
  241. // Incorporate relu
  242. mkldnn::primitive_attr convAttr;
  243. if (relu)
  244. {
  245. mkldnn::post_ops ops;
  246. ops.append_eltwise(
  247. 1.f, // scale factor, not used
  248. algorithm::eltwise_relu,
  249. 0.f, // max with
  250. 0.f // unused
  251. );
  252. convAttr.set_post_ops(ops);
  253. }
  254. convAttr.set_scratchpad_mode(scratchpad_mode_user);
  255. auto convPrimDesc = convolution_forward::primitive_desc(convDesc, convAttr, eng);
  256. // Reorder the weights to the final format, if necessary
  257. auto weights = weightsPad;
  258. if (convPrimDesc.weights_desc() != weightsPad->get_desc())
  259. {
  260. weights = std::make_shared<memory>(convPrimDesc.weights_desc(), eng);
  261. ReorderNode(weightsPad, weights).execute(sm);
  262. }
  263. // Create convolution node and add it to the net
  264. auto node = std::make_shared<ConvNode>(convPrimDesc, src, weights, bias, dst);
  265. nodes.push_back(node);
  266. return node;
  267. }
  268. template<int K>
  269. memory::dims Network<K>::getPoolDims(const memory::dims& srcDims)
  270. {
  271. memory::dims dstDims = srcDims;
  272. dstDims[2] /= 2; // H/2
  273. dstDims[3] /= 2; // W/2
  274. return dstDims;
  275. }
  276. template<int K>
  277. std::shared_ptr<Node> Network<K>::addPool(const std::shared_ptr<memory>& src,
  278. const std::shared_ptr<memory>& userDst)
  279. {
  280. const memory::dims kernel = {2, 2};
  281. const memory::dims strides = {2, 2};
  282. const memory::dims padding = {0, 0};
  283. memory::dims srcDims = getTensorDims(src);
  284. memory::dims dstDims = getPoolDims(srcDims);
  285. std::shared_ptr<memory> dst;
  286. if (!userDst)
  287. dst = allocTensor(dstDims);
  288. else if (getTensorDims(userDst) == dstDims)
  289. dst = userDst;
  290. else
  291. dst = castTensor(dstDims, userDst);
  292. auto poolDesc = pooling_forward::desc(
  293. prop_kind::forward_inference, pooling_max,
  294. src->get_desc(),
  295. dst->get_desc(),
  296. strides, kernel, padding, padding, padding_kind::zero);
  297. mkldnn::primitive_attr poolAttr;
  298. poolAttr.set_scratchpad_mode(scratchpad_mode_user);
  299. auto poolPrimDesc = pooling_forward::primitive_desc(poolDesc, poolAttr, eng);
  300. auto node = std::make_shared<PoolNode>(poolPrimDesc, src, dst);
  301. nodes.push_back(node);
  302. return node;
  303. }
  304. template<int K>
  305. memory::dims Network<K>::getUpsampleDims(const memory::dims& srcDims)
  306. {
  307. memory::dims dstDims = srcDims;
  308. dstDims[2] *= 2; // H*2
  309. dstDims[3] *= 2; // W*2
  310. return dstDims;
  311. }
  312. template<int K>
  313. std::shared_ptr<Node> Network<K>::addUpsample(const std::shared_ptr<memory>& src,
  314. const std::shared_ptr<memory>& userDst)
  315. {
  316. memory::dims srcDims = getTensorDims(src);
  317. memory::dims dstDims = getUpsampleDims(srcDims);
  318. std::shared_ptr<memory> dst;
  319. if (!userDst)
  320. dst = allocTensor(dstDims);
  321. else if (getTensorDims(userDst) == dstDims)
  322. dst = userDst;
  323. else
  324. dst = castTensor(dstDims, userDst);
  325. // Create upsampling node and add it to net
  326. auto node = std::make_shared<UpsampleNode<K>>(src, dst);
  327. nodes.push_back(node);
  328. return node;
  329. }
  330. template<int K>
  331. memory::dims Network<K>::getConcatDims(const memory::dims& src1Dims, const memory::dims& src2Dims)
  332. {
  333. assert(src1Dims[0] == src2Dims[0]); // N
  334. assert(src1Dims[2] == src2Dims[2]); // H
  335. assert(src1Dims[3] == src2Dims[3]); // W
  336. memory::dims dstDims = src1Dims;
  337. dstDims[1] += src2Dims[1]; // C
  338. return dstDims;
  339. }
  340. template<int K>
  341. std::shared_ptr<Node> Network<K>::addAutoexposure(const Image& color,
  342. const std::shared_ptr<HDRTransferFunction>& transferFunc)
  343. {
  344. auto node = std::make_shared<AutoexposureNode>(color, transferFunc);
  345. nodes.push_back(node);
  346. return node;
  347. }
  348. template <int K>
  349. void Network<K>::finalize()
  350. {
  351. // Compute the size of the scratchpad
  352. size_t scratchpadSize = 0;
  353. for (const auto& node : nodes)
  354. scratchpadSize = max(scratchpadSize, node->getScratchpadSize());
  355. // Allocate the scratchpad
  356. memory::dims scratchpadDims = { memory::dim(scratchpadSize) };
  357. memory::desc scratchpadDesc(scratchpadDims, memory::data_type::u8, memory::format_tag::x);
  358. auto scratchpad = std::make_shared<memory>(scratchpadDesc, eng);
  359. activationAllocBytes += scratchpadSize;
  360. totalAllocBytes += scratchpadSize;
  361. // Set the scratchpad for the nodes
  362. for (auto& node : nodes)
  363. node->setScratchpad(scratchpad);
  364. // Free the weights
  365. weightMap.clear();
  366. // Print statistics
  367. if (device->isVerbose(2))
  368. {
  369. std::cout << "Activation bytes: " << activationAllocBytes << std::endl;
  370. std::cout << "Scratchpad bytes: " << scratchpadSize << std::endl;
  371. std::cout << "Total bytes : " << totalAllocBytes << std::endl;
  372. }
  373. }
  374. template class Network<8>;
  375. template class Network<16>;
  376. } // namespace oidn