123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437 |
- // ======================================================================== //
- // Copyright 2009-2019 Intel Corporation //
- // //
- // Licensed under the Apache License, Version 2.0 (the "License"); //
- // you may not use this file except in compliance with the License. //
- // You may obtain a copy of the License at //
- // //
- // http://www.apache.org/licenses/LICENSE-2.0 //
- // //
- // Unless required by applicable law or agreed to in writing, software //
- // distributed under the License is distributed on an "AS IS" BASIS, //
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
- // See the License for the specific language governing permissions and //
- // limitations under the License. //
- // ======================================================================== //
- #include "upsample.h"
- #include "weights_reorder.h"
- #include "network.h"
- // -- GODOT start --
- #include <cstring>
- // -- GODOT end --
- namespace oidn {
- template<int K>
- Network<K>::Network(const Ref<Device>& device, const std::map<std::string, Tensor>& weightMap)
- : device(device),
- eng(engine::cpu, 0),
- sm(eng),
- weightMap(weightMap)
- {
- }
- template<int K>
- void Network<K>::execute(const Progress& progress, int taskIndex)
- {
- if (progress.func)
- {
- const double value = double(taskIndex) / double(progress.taskCount);
- if (!progress.func(progress.userPtr, value))
- throw Exception(Error::Cancelled, "execution was cancelled");
- }
- for (size_t i = 0; i < nodes.size(); ++i)
- {
- nodes[i]->execute(sm);
- if (progress.func)
- {
- const double value = (double(taskIndex) + double(i+1) / double(nodes.size())) / double(progress.taskCount);
- if (!progress.func(progress.userPtr, value))
- throw Exception(Error::Cancelled, "execution was cancelled");
- }
- }
- }
- template<int K>
- std::shared_ptr<memory> Network<K>::allocTensor(const memory::dims& dims,
- memory::format_tag format,
- void* data)
- {
- if (format == memory::format_tag::any)
- {
- if (dims.size() == 4)
- format = BlockedFormat<K>::nChwKc;
- else if (dims.size() == 1)
- format = memory::format_tag::x;
- else
- assert(0);
- }
- memory::desc desc(dims, memory::data_type::f32, format);
- if (data == nullptr)
- {
- const size_t bytes = getTensorSize(dims) * sizeof(float);
- if (format == BlockedFormat<K>::nChwKc)
- activationAllocBytes += bytes;
- totalAllocBytes += bytes;
- return std::make_shared<memory>(desc, eng);
- }
- else
- {
- return std::make_shared<memory>(desc, eng, data);
- }
- }
- template<int K>
- std::shared_ptr<memory> Network<K>::castTensor(const memory::dims& dims,
- const std::shared_ptr<memory>& src,
- size_t srcOffset,
- memory::format_tag format)
- {
- const mkldnn_memory_desc_t& srcDesc = src->get_desc().data;
- MAYBE_UNUSED(srcDesc);
- assert(srcDesc.data_type == memory::data_type::f32);
- assert(getTensorSize(src) >= srcOffset + getTensorSize(dims));
- if (format == memory::format_tag::any)
- {
- if (dims.size() == 4)
- format = BlockedFormat<K>::nChwKc;
- else if (dims.size() == 1)
- format = memory::format_tag::x;
- else
- assert(0);
- }
- memory::desc desc(dims, memory::data_type::f32, format);
- float* srcPtr = (float*)src->get_data_handle() + srcOffset;
- return std::make_shared<memory>(desc, eng, srcPtr);
- }
- template<int K>
- std::shared_ptr<memory> Network<K>::castTensor(const memory::dims& dims,
- const std::shared_ptr<memory>& src,
- const memory::dims& srcOffset)
- {
- return castTensor(dims, src, getTensorSize(srcOffset));
- }
- template<int K>
- void Network<K>::zeroTensor(const std::shared_ptr<memory>& dst)
- {
- assert(getTensorType(dst) == memory::data_type::f32);
- memset(dst->get_data_handle(), 0, getTensorSize(dst)*sizeof(float));
- }
- template<int K>
- memory::dims Network<K>::getInputReorderDims(const memory::dims& srcDims, int alignment)
- {
- memory::dims dstDims = srcDims;
- dstDims[1] = getPadded<K>(srcDims[1]); // round up C
- dstDims[2] = roundUp(srcDims[2], memory::dim(alignment)); // round up H
- dstDims[3] = roundUp(srcDims[3], memory::dim(alignment)); // round up W
- return dstDims;
- }
- template<int K>
- std::shared_ptr<Node> Network<K>::addInputReorder(const Image& color,
- const Image& albedo,
- const Image& normal,
- const std::shared_ptr<TransferFunction>& transferFunc,
- int alignment,
- const std::shared_ptr<memory>& userDst)
- {
- assert(color);
- int inputC = 3;
- if (albedo) inputC += 3;
- if (normal) inputC += 3;
- memory::dims srcDims = {1, inputC, color.height, color.width};
- memory::dims dstDims = getInputReorderDims(srcDims, alignment);
- // Allocate padded memory
- auto dst = userDst;
- if (!dst)
- dst = allocTensor(dstDims);
- // Push node
- std::shared_ptr<Node> node;
- if (auto tf = std::dynamic_pointer_cast<LinearTransferFunction>(transferFunc))
- node = std::make_shared<InputReorderNode<K, LinearTransferFunction>>(color, albedo, normal, dst, tf);
- else if (auto tf = std::dynamic_pointer_cast<GammaTransferFunction>(transferFunc))
- node = std::make_shared<InputReorderNode<K, GammaTransferFunction>>(color, albedo, normal, dst, tf);
- else if (auto tf = std::dynamic_pointer_cast<LogTransferFunction>(transferFunc))
- node = std::make_shared<InputReorderNode<K, LogTransferFunction>>(color, albedo, normal, dst, tf);
- else if (auto tf = std::dynamic_pointer_cast<PQXTransferFunction>(transferFunc))
- node = std::make_shared<InputReorderNode<K, PQXTransferFunction>>(color, albedo, normal, dst, tf);
- else
- assert(0);
- nodes.push_back(node);
- return node;
- }
- template<int K>
- std::shared_ptr<Node> Network<K>::addOutputReorder(const std::shared_ptr<memory>& src,
- const std::shared_ptr<TransferFunction>& transferFunc,
- const Image& output)
- {
- memory::dims srcDims = getTensorDims(src);
- assert(srcDims[1] == K);
- // Push node
- std::shared_ptr<Node> node;
- if (auto tf = std::dynamic_pointer_cast<LinearTransferFunction>(transferFunc))
- node = std::make_shared<OutputReorderNode<K, LinearTransferFunction>>(src, output, tf);
- else if (auto tf = std::dynamic_pointer_cast<GammaTransferFunction>(transferFunc))
- node = std::make_shared<OutputReorderNode<K, GammaTransferFunction>>(src, output, tf);
- else if (auto tf = std::dynamic_pointer_cast<LogTransferFunction>(transferFunc))
- node = std::make_shared<OutputReorderNode<K, LogTransferFunction>>(src, output, tf);
- else if (auto tf = std::dynamic_pointer_cast<PQXTransferFunction>(transferFunc))
- node = std::make_shared<OutputReorderNode<K, PQXTransferFunction>>(src, output, tf);
- else
- assert(0);
- nodes.push_back(node);
- return node;
- }
- template<int K>
- memory::dims Network<K>::getConvDims(const std::string& name, const memory::dims& srcDims)
- {
- auto b = weightMap[name + "/b"];
- memory::dims dstDims = srcDims;
- dstDims[1] = getPadded<K>(b.dims[0]); // dstDims[C] = getPadded(OC)
- return dstDims;
- }
- template<int K>
- std::shared_ptr<Node> Network<K>::addConv(const std::string& name,
- const std::shared_ptr<memory>& src,
- const std::shared_ptr<memory>& userDst,
- bool relu)
- {
- const memory::dims strides = {1, 1};
- const memory::dims padding = {1, 1};
- memory::dims srcDims = getTensorDims(src);
- // Get the weights
- const auto& W = weightMap[name + "/W"];
- if (W.ndims() != 4 || W.format != "oihw")
- throw Exception(Error::InvalidOperation, "invalid convolution weights");
- memory::dims weightsDims = W.dims;
- auto userWeights = allocTensor(weightsDims, memory::format_tag::oihw, W.data);
- // Pad the weights
- memory::dims weightsPadDims = weightsDims;
- weightsPadDims[1] = getPadded<K>(weightsDims[1]); // IC
- weightsPadDims[0] = getPadded<K>(weightsDims[0]); // OC
- assert(srcDims[1] == weightsPadDims[1]); // srcDims[C] == weightsPadDims[IC]
- auto weightsPad = allocTensor(weightsPadDims, memory::format_tag::oihw);
- WeightsReorderNode<K>(userWeights, weightsPad).execute(sm);
- // Get the biases
- const auto& b = weightMap[name + "/b"];
- if (b.ndims() != 1)
- throw Exception(Error::InvalidOperation, "invalid convolution biases");
- memory::dims biasDims = b.dims;
- // Copy/pad the biases
- memory::dims biasPadDims = {getPadded<K>(biasDims[0])};
- auto bias = allocTensor(biasPadDims);
- if (biasDims[0] != biasPadDims[0])
- memset(bias->get_data_handle(), 0, biasPadDims[0]*sizeof(float));
- memcpy(bias->get_data_handle(), b.data, biasDims[0]*sizeof(float));
- // Allocate memory for destination
- memory::dims dstDims = srcDims;
- dstDims[1] = weightsPadDims[0]; // dstDims[C] = weightsPadDims[OC]
- std::shared_ptr<memory> dst;
- if (!userDst)
- dst = allocTensor(dstDims);
- else if (getTensorDims(userDst) == dstDims)
- dst = userDst;
- else
- dst = castTensor(dstDims, userDst);
- // Create a convolution
- // Let the convolution primitive choose the weights format
- auto weightsDesc = memory::desc({ weightsPadDims }, memory::data_type::f32, memory::format_tag::any);
- auto convAlgo = (K == 16) ? convolution_winograd : convolution_direct;
- auto convDesc = convolution_forward::desc(
- prop_kind::forward_inference, convAlgo,
- src->get_desc(),
- weightsDesc,
- bias->get_desc(),
- dst->get_desc(),
- strides, padding, padding, padding_kind::zero);
- // Incorporate relu
- mkldnn::primitive_attr convAttr;
- if (relu)
- {
- mkldnn::post_ops ops;
- ops.append_eltwise(
- 1.f, // scale factor, not used
- algorithm::eltwise_relu,
- 0.f, // max with
- 0.f // unused
- );
- convAttr.set_post_ops(ops);
- }
- convAttr.set_scratchpad_mode(scratchpad_mode_user);
- auto convPrimDesc = convolution_forward::primitive_desc(convDesc, convAttr, eng);
- // Reorder the weights to the final format, if necessary
- auto weights = weightsPad;
- if (convPrimDesc.weights_desc() != weightsPad->get_desc())
- {
- weights = std::make_shared<memory>(convPrimDesc.weights_desc(), eng);
- ReorderNode(weightsPad, weights).execute(sm);
- }
- // Create convolution node and add it to the net
- auto node = std::make_shared<ConvNode>(convPrimDesc, src, weights, bias, dst);
- nodes.push_back(node);
- return node;
- }
- template<int K>
- memory::dims Network<K>::getPoolDims(const memory::dims& srcDims)
- {
- memory::dims dstDims = srcDims;
- dstDims[2] /= 2; // H/2
- dstDims[3] /= 2; // W/2
- return dstDims;
- }
- template<int K>
- std::shared_ptr<Node> Network<K>::addPool(const std::shared_ptr<memory>& src,
- const std::shared_ptr<memory>& userDst)
- {
- const memory::dims kernel = {2, 2};
- const memory::dims strides = {2, 2};
- const memory::dims padding = {0, 0};
- memory::dims srcDims = getTensorDims(src);
- memory::dims dstDims = getPoolDims(srcDims);
- std::shared_ptr<memory> dst;
- if (!userDst)
- dst = allocTensor(dstDims);
- else if (getTensorDims(userDst) == dstDims)
- dst = userDst;
- else
- dst = castTensor(dstDims, userDst);
- auto poolDesc = pooling_forward::desc(
- prop_kind::forward_inference, pooling_max,
- src->get_desc(),
- dst->get_desc(),
- strides, kernel, padding, padding, padding_kind::zero);
- mkldnn::primitive_attr poolAttr;
- poolAttr.set_scratchpad_mode(scratchpad_mode_user);
- auto poolPrimDesc = pooling_forward::primitive_desc(poolDesc, poolAttr, eng);
- auto node = std::make_shared<PoolNode>(poolPrimDesc, src, dst);
- nodes.push_back(node);
- return node;
- }
- template<int K>
- memory::dims Network<K>::getUpsampleDims(const memory::dims& srcDims)
- {
- memory::dims dstDims = srcDims;
- dstDims[2] *= 2; // H*2
- dstDims[3] *= 2; // W*2
- return dstDims;
- }
- template<int K>
- std::shared_ptr<Node> Network<K>::addUpsample(const std::shared_ptr<memory>& src,
- const std::shared_ptr<memory>& userDst)
- {
- memory::dims srcDims = getTensorDims(src);
- memory::dims dstDims = getUpsampleDims(srcDims);
- std::shared_ptr<memory> dst;
- if (!userDst)
- dst = allocTensor(dstDims);
- else if (getTensorDims(userDst) == dstDims)
- dst = userDst;
- else
- dst = castTensor(dstDims, userDst);
- // Create upsampling node and add it to net
- auto node = std::make_shared<UpsampleNode<K>>(src, dst);
- nodes.push_back(node);
- return node;
- }
- template<int K>
- memory::dims Network<K>::getConcatDims(const memory::dims& src1Dims, const memory::dims& src2Dims)
- {
- assert(src1Dims[0] == src2Dims[0]); // N
- assert(src1Dims[2] == src2Dims[2]); // H
- assert(src1Dims[3] == src2Dims[3]); // W
- memory::dims dstDims = src1Dims;
- dstDims[1] += src2Dims[1]; // C
- return dstDims;
- }
- template<int K>
- std::shared_ptr<Node> Network<K>::addAutoexposure(const Image& color,
- const std::shared_ptr<HDRTransferFunction>& transferFunc)
- {
- auto node = std::make_shared<AutoexposureNode>(color, transferFunc);
- nodes.push_back(node);
- return node;
- }
- template <int K>
- void Network<K>::finalize()
- {
- // Compute the size of the scratchpad
- size_t scratchpadSize = 0;
- for (const auto& node : nodes)
- scratchpadSize = max(scratchpadSize, node->getScratchpadSize());
- // Allocate the scratchpad
- memory::dims scratchpadDims = { memory::dim(scratchpadSize) };
- memory::desc scratchpadDesc(scratchpadDims, memory::data_type::u8, memory::format_tag::x);
- auto scratchpad = std::make_shared<memory>(scratchpadDesc, eng);
- activationAllocBytes += scratchpadSize;
- totalAllocBytes += scratchpadSize;
- // Set the scratchpad for the nodes
- for (auto& node : nodes)
- node->setScratchpad(scratchpad);
- // Free the weights
- weightMap.clear();
- // Print statistics
- if (device->isVerbose(2))
- {
- std::cout << "Activation bytes: " << activationAllocBytes << std::endl;
- std::cout << "Scratchpad bytes: " << scratchpadSize << std::endl;
- std::cout << "Total bytes : " << totalAllocBytes << std::endl;
- }
- }
- template class Network<8>;
- template class Network<16>;
- } // namespace oidn
|