123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623 |
- /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
- /* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
- #include "WebGLElementArrayCache.h"
- #include <algorithm>
- #include <cstdlib>
- #include <cstring>
- #include <limits>
- #include "mozilla/Assertions.h"
- #include "mozilla/MathAlgorithms.h"
- #include "mozilla/MemoryReporting.h"
- namespace mozilla {
- /* WebGLElementArrayCacheTree contains most of the implementation of
- * WebGLElementArrayCache, which performs WebGL element array buffer validation
- * for drawElements.
- *
- * Attention: Here lie nontrivial data structures, bug-prone algorithms, and
- * non-canonical tweaks! Whence the explanatory comments, and compiled unit
- * test.
- *
- * *** What problem are we solving here? ***
- *
- * WebGL::DrawElements has to validate that the elements are in range wrt the
- * current vertex attribs. This boils down to the problem, given an array of
- * integers, of computing the maximum in an arbitrary sub-array. The naive
- * algorithm has linear complexity; this has been a major performance problem,
- * see bug 569431. In that bug, we took the approach of caching the max for the
- * whole array, which does cover most cases (DrawElements typically consumes the
- * whole element array buffer) but doesn't help in other use cases:
- * - when doing "partial DrawElements" i.e. consuming only part of the element
- * array buffer
- * - when doing frequent "partial buffer updates" i.e. bufferSubData calls
- * updating parts of the element array buffer
- *
- * *** The solution: A binary tree ***
- *
- * The solution implemented here is to use a binary tree as the cache data
- * structure. Each tree node contains the max of its two children nodes. In this
- * way, finding the maximum in any contiguous sub-array has log complexity
- * instead of linear complexity.
- *
- * Simplistically, if the element array is:
- *
- * [1 4 3 2]
- *
- * then the corresponding tree is:
- *
- * 4
- * _/ \_
- * 4 3
- * / \ / \
- * 1 4 3 2
- *
- * In practice, the bottom-most levels of the tree are both the largest to store
- * (because they have more nodes), and the least useful performance-wise
- * (because each node in the bottom levels concerns only few entries in the
- * elements array buffer, it is cheap to compute).
- *
- * For this reason, we stop the tree a few levels above, so that each tree leaf
- * actually corresponds to more than one element array entry.
- *
- * The number of levels that we "drop" is |kSkippedBottomTreeLevels| and the
- * number of element array entries that each leaf corresponds to, is
- * |kElementsPerLeaf|. This being a binary tree, we have:
- *
- * kElementsPerLeaf = 2 ^ kSkippedBottomTreeLevels.
- *
- * *** Storage layout of the binary tree ***
- *
- * We take advantage of the specifics of the situation to avoid generalist tree
- * storage and instead store the tree entries in a vector, mTreeData.
- *
- * TreeData is always a vector of length:
- *
- * 2 * (number of leaves).
- *
- * Its data layout is as follows: mTreeData[0] is unused, mTreeData[1] is the
- * root node, then at offsets 2..3 is the tree level immediately below the root
- * node, then at offsets 4..7 is the tree level below that, etc.
- *
- * The figure below illustrates this by writing at each tree node the offset
- * into mTreeData at which it is stored:
- *
- * 1
- * _/ \_
- * 2 3
- * / \ / \
- * 4 5 6 7
- * ...
- *
- * Thus, under the convention that the root level is level 0, we see that level
- * N is stored at offsets:
- *
- * [ 2^n .. 2^(n+1) - 1 ]
- *
- * in mTreeData. Likewise, all the usual tree operations have simple
- * mathematical expressions in terms of mTreeData offsets, see all the methods
- * such as ParentNode, LeftChildNode, etc.
- *
- * *** Design constraint: Element types aren't known at buffer-update time ***
- *
- * Note that a key constraint that we're operating under, is that we don't know
- * the types of the elements by the time WebGL bufferData/bufferSubData methods
- * are called. The type of elements is only specified in the drawElements call.
- * This means that we may potentially have to store caches for multiple element
- * types, for the same element array buffer. Since we don't know yet how many
- * element types we'll eventually support (extensions add more), the concern
- * about memory usage is serious. This is addressed by kSkippedBottomTreeLevels
- * as explained above. Of course, in the typical case where each element array
- * buffer is only ever used with one type, this is also addressed by having
- * WebGLElementArrayCache lazily create trees for each type only upon first use.
- *
- * Another consequence of this constraint is that when updating the trees, we
- * have to update all existing trees. So if trees for types uint8_t, uint16_t
- * and uint32_t have ever been constructed for this buffer, every subsequent
- * update will have to update all trees even if one of the types is never used
- * again. That's inefficient, but content should not put indices of different
- * types in the same element array buffer anyways. Different index types can
- * only be consumed in separate drawElements calls, so nothing particular is
- * to be achieved by lumping them in the same buffer object.
- */
- template<typename T>
- struct WebGLElementArrayCacheTree
- {
- /* A too-high kSkippedBottomTreeLevels would harm the performance of small
- * drawElements calls. A too-low kSkippedBottomTreeLevels would cause undue
- * memory usage. The current value has been validated by some benchmarking.
- * See bug 732660.
- */
- static const size_t kSkippedBottomTreeLevels = 3;
- static const size_t kElementsPerLeaf = 1 << kSkippedBottomTreeLevels;
- // Since kElementsPerLeaf is POT:
- static const size_t kElementsPerLeafMask = kElementsPerLeaf - 1;
- private:
- // The WebGLElementArrayCache that owns this tree:
- WebGLElementArrayCache& mParent;
- // The tree's internal data storage. Its length is 2 * (number of leaves)
- // because of its data layout explained in the above class comment.
- FallibleTArray<T> mTreeData;
- public:
- // Constructor. Takes a reference to the WebGLElementArrayCache that is to be
- // the parent. Does not initialize the tree. Should be followed by a call
- // to Update() to attempt initializing the tree.
- explicit WebGLElementArrayCacheTree(WebGLElementArrayCache& value)
- : mParent(value)
- {
- }
- T GlobalMaximum() const {
- return mTreeData[1];
- }
- // returns the index of the parent node; if treeIndex=1 (the root node),
- // the return value is 0.
- static size_t ParentNode(size_t treeIndex) {
- MOZ_ASSERT(treeIndex > 1);
- return treeIndex >> 1;
- }
- static bool IsRightNode(size_t treeIndex) {
- MOZ_ASSERT(treeIndex > 1);
- return treeIndex & 1;
- }
- static bool IsLeftNode(size_t treeIndex) {
- MOZ_ASSERT(treeIndex > 1);
- return !IsRightNode(treeIndex);
- }
- static size_t SiblingNode(size_t treeIndex) {
- MOZ_ASSERT(treeIndex > 1);
- return treeIndex ^ 1;
- }
- static size_t LeftChildNode(size_t treeIndex) {
- MOZ_ASSERT(treeIndex);
- return treeIndex << 1;
- }
- static size_t RightChildNode(size_t treeIndex) {
- MOZ_ASSERT(treeIndex);
- return SiblingNode(LeftChildNode(treeIndex));
- }
- static size_t LeftNeighborNode(size_t treeIndex, size_t distance = 1) {
- MOZ_ASSERT(treeIndex > 1);
- return treeIndex - distance;
- }
- static size_t RightNeighborNode(size_t treeIndex, size_t distance = 1) {
- MOZ_ASSERT(treeIndex > 1);
- return treeIndex + distance;
- }
- size_t NumLeaves() const {
- // See class comment for why we the tree storage size is 2 * numLeaves.
- return mTreeData.Length() >> 1;
- }
- size_t LeafForElement(size_t element) const {
- size_t leaf = element / kElementsPerLeaf;
- MOZ_ASSERT(leaf < NumLeaves());
- return leaf;
- }
- size_t LeafForByte(size_t byte) const {
- return LeafForElement(byte / sizeof(T));
- }
- // Returns the index, into the tree storage, where a given leaf is stored.
- size_t TreeIndexForLeaf(size_t leaf) const {
- // See above class comment. The tree storage is an array of length
- // 2 * numLeaves. The leaves are stored in its second half.
- return leaf + NumLeaves();
- }
- static size_t LastElementUnderSameLeaf(size_t element) {
- return element | kElementsPerLeafMask;
- }
- static size_t FirstElementUnderSameLeaf(size_t element) {
- return element & ~kElementsPerLeafMask;
- }
- static size_t NextMultipleOfElementsPerLeaf(size_t numElements) {
- MOZ_ASSERT(numElements >= 1);
- return ((numElements - 1) | kElementsPerLeafMask) + 1;
- }
- bool Validate(T maxAllowed, size_t firstLeaf, size_t lastLeaf)
- {
- size_t firstTreeIndex = TreeIndexForLeaf(firstLeaf);
- size_t lastTreeIndex = TreeIndexForLeaf(lastLeaf);
- while (true) {
- // Given that we tweak these values in nontrivial ways, it doesn't
- // hurt to do this sanity check.
- MOZ_ASSERT(firstTreeIndex <= lastTreeIndex);
- // Final case where there is only one node to validate at the
- // current tree level:
- if (lastTreeIndex == firstTreeIndex) {
- const T& curData = mTreeData[firstTreeIndex];
- return curData <= maxAllowed;
- }
- // If the first node at current tree level is a right node, handle
- // it individually and replace it with its right neighbor, which is
- // a left node.
- if (IsRightNode(firstTreeIndex)) {
- const T& curData = mTreeData[firstTreeIndex];
- if (curData > maxAllowed)
- return false;
- firstTreeIndex = RightNeighborNode(firstTreeIndex);
- }
- // If the last node at current tree level is a left node, handle it
- // individually and replace it with its left neighbor, which is a
- // right node.
- if (IsLeftNode(lastTreeIndex)) {
- const T& curData = mTreeData[lastTreeIndex];
- if (curData > maxAllowed)
- return false;
- lastTreeIndex = LeftNeighborNode(lastTreeIndex);
- }
- /* At this point it can happen that firstTreeIndex and lastTreeIndex
- * "crossed" eachother. That happens if firstTreeIndex was a right
- * node and lastTreeIndex was its right neighor: In that case, both
- * above tweaks happened and as a result, they ended up being
- * swapped: LastTreeIndex is now the _left_ neighbor of
- * firstTreeIndex. When that happens, there is nothing left to
- * validate.
- */
- if (lastTreeIndex == LeftNeighborNode(firstTreeIndex))
- return true;
- // Walk up one level.
- firstTreeIndex = ParentNode(firstTreeIndex);
- lastTreeIndex = ParentNode(lastTreeIndex);
- }
- }
- // Updates the tree from the parent's buffer contents. Fallible, as it
- // may have to resize the tree storage.
- bool Update(size_t firstByte, size_t lastByte);
- size_t SizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf) const
- {
- return mallocSizeOf(this) +
- mTreeData.ShallowSizeOfExcludingThis(mallocSizeOf);
- }
- };
- // TreeForType: just a template helper to select the right tree object for a given
- // element type.
- template<typename T>
- struct TreeForType {};
- template<>
- struct TreeForType<uint8_t>
- {
- static UniquePtr<WebGLElementArrayCacheTree<uint8_t>>&
- Value(WebGLElementArrayCache* b) {
- return b->mUint8Tree;
- }
- };
- template<>
- struct TreeForType<uint16_t>
- {
- static UniquePtr<WebGLElementArrayCacheTree<uint16_t>>&
- Value(WebGLElementArrayCache* b) {
- return b->mUint16Tree;
- }
- };
- template<>
- struct TreeForType<uint32_t>
- {
- static UniquePtr<WebGLElementArrayCacheTree<uint32_t>>&
- Value(WebGLElementArrayCache* b) {
- return b->mUint32Tree;
- }
- };
- // Calling this method will 1) update the leaves in this interval
- // from the raw buffer data, and 2) propagate this update up the tree.
- template<typename T>
- bool
- WebGLElementArrayCacheTree<T>::Update(size_t firstByte, size_t lastByte)
- {
- MOZ_ASSERT(firstByte <= lastByte);
- MOZ_ASSERT(lastByte < mParent.mBytes.Length());
- size_t numberOfElements = mParent.mBytes.Length() / sizeof(T);
- size_t requiredNumLeaves = 0;
- if (numberOfElements > 0) {
- /* If we didn't require the number of leaves to be a power of two, then
- * it would just be equal to
- *
- * ceil(numberOfElements / kElementsPerLeaf)
- *
- * The way we implement this (division+ceil) operation in integer
- * arithmetic
- * is as follows:
- */
- size_t numLeavesNonPOT = (numberOfElements + kElementsPerLeaf - 1) / kElementsPerLeaf;
- // It only remains to round that up to the next power of two:
- requiredNumLeaves = RoundUpPow2(numLeavesNonPOT);
- }
- // Step #0: If needed, resize our tree data storage.
- if (requiredNumLeaves != NumLeaves()) {
- // See class comment for why we the tree storage size is 2 * numLeaves.
- if (!mTreeData.SetLength(2 * requiredNumLeaves, fallible)) {
- mTreeData.Clear();
- return false;
- }
- MOZ_ASSERT(NumLeaves() == requiredNumLeaves);
- if (NumLeaves()) {
- // When resizing, update the whole tree, not just the subset
- // corresponding to the part of the buffer being updated.
- memset(mTreeData.Elements(), 0, mTreeData.Length() * sizeof(T));
- firstByte = 0;
- lastByte = mParent.mBytes.Length() - 1;
- }
- }
- if (NumLeaves() == 0)
- return true;
- lastByte = std::min(lastByte, NumLeaves() * kElementsPerLeaf * sizeof(T) - 1);
- if (firstByte > lastByte)
- return true;
- size_t firstLeaf = LeafForByte(firstByte);
- size_t lastLeaf = LeafForByte(lastByte);
- MOZ_ASSERT(firstLeaf <= lastLeaf && lastLeaf < NumLeaves());
- size_t firstTreeIndex = TreeIndexForLeaf(firstLeaf);
- size_t lastTreeIndex = TreeIndexForLeaf(lastLeaf);
- // Step #1: Initialize the tree leaves from plain buffer data.
- // That is, each tree leaf must be set to the max of the |kElementsPerLeaf|
- // corresponding buffer entries.
- // Condition-less scope to prevent leaking this scope's variables into the
- // code below:
- {
- // TreeIndex is the index of the tree leaf we're writing, i.e. the
- // destination index.
- size_t treeIndex = firstTreeIndex;
- // srcIndex is the index in the source buffer.
- size_t srcIndex = firstLeaf * kElementsPerLeaf;
- while (treeIndex <= lastTreeIndex) {
- T m = 0;
- size_t a = srcIndex;
- size_t srcIndexNextLeaf = std::min(a + kElementsPerLeaf, numberOfElements);
- for (; srcIndex < srcIndexNextLeaf; srcIndex++) {
- m = std::max(m, mParent.Element<T>(srcIndex));
- }
- mTreeData[treeIndex] = m;
- treeIndex++;
- }
- }
- // Step #2: Propagate the values up the tree. This is simply a matter of
- // walking up the tree and setting each node to the max of its two children.
- while (firstTreeIndex > 1) {
- // Move up one level.
- firstTreeIndex = ParentNode(firstTreeIndex);
- lastTreeIndex = ParentNode(lastTreeIndex);
- // Fast-exit case where only one node is updated at the current level.
- if (firstTreeIndex == lastTreeIndex) {
- mTreeData[firstTreeIndex] = std::max(mTreeData[LeftChildNode(firstTreeIndex)], mTreeData[RightChildNode(firstTreeIndex)]);
- continue;
- }
- size_t child = LeftChildNode(firstTreeIndex);
- size_t parent = firstTreeIndex;
- while (parent <= lastTreeIndex) {
- T a = mTreeData[child];
- child = RightNeighborNode(child);
- T b = mTreeData[child];
- child = RightNeighborNode(child);
- mTreeData[parent] = std::max(a, b);
- parent = RightNeighborNode(parent);
- }
- }
- return true;
- }
- WebGLElementArrayCache::WebGLElementArrayCache()
- {
- }
- WebGLElementArrayCache::~WebGLElementArrayCache()
- {
- }
- bool
- WebGLElementArrayCache::BufferData(const void* ptr, size_t byteLength)
- {
- if (mBytes.Length() != byteLength) {
- if (!mBytes.SetLength(byteLength, fallible)) {
- mBytes.Clear();
- return false;
- }
- }
- MOZ_ASSERT(mBytes.Length() == byteLength);
- return BufferSubData(0, ptr, byteLength);
- }
- bool
- WebGLElementArrayCache::BufferSubData(size_t pos, const void* ptr,
- size_t updateByteLength)
- {
- MOZ_ASSERT(pos + updateByteLength <= mBytes.Length());
- if (!updateByteLength)
- return true;
- // Note, using memcpy on shared racy data is not well-defined, this
- // will need to use safe-for-races operations when those become available.
- // See bug 1225033.
- if (ptr)
- memcpy(mBytes.Elements() + pos, ptr, updateByteLength);
- else
- memset(mBytes.Elements() + pos, 0, updateByteLength);
- return UpdateTrees(pos, pos + updateByteLength - 1);
- }
- bool
- WebGLElementArrayCache::UpdateTrees(size_t firstByte, size_t lastByte)
- {
- bool result = true;
- if (mUint8Tree)
- result &= mUint8Tree->Update(firstByte, lastByte);
- if (mUint16Tree)
- result &= mUint16Tree->Update(firstByte, lastByte);
- if (mUint32Tree)
- result &= mUint32Tree->Update(firstByte, lastByte);
- return result;
- }
- template<typename T>
- bool
- WebGLElementArrayCache::Validate(uint32_t maxAllowed, size_t firstElement,
- size_t countElements)
- {
- // If maxAllowed is >= the max T value, then there is no way that a T index
- // could be invalid.
- uint32_t maxTSize = std::numeric_limits<T>::max();
- if (maxAllowed >= maxTSize)
- return true;
- T maxAllowedT(maxAllowed);
- // Integer overflow must have been handled earlier, so we assert that
- // maxAllowedT is exactly the max allowed value.
- MOZ_ASSERT(uint32_t(maxAllowedT) == maxAllowed);
- if (!mBytes.Length() || !countElements)
- return true;
- UniquePtr<WebGLElementArrayCacheTree<T>>& tree = TreeForType<T>::Value(this);
- if (!tree) {
- tree = MakeUnique<WebGLElementArrayCacheTree<T>>(*this);
- if (mBytes.Length()) {
- bool valid = tree->Update(0, mBytes.Length() - 1);
- if (!valid) {
- // Do not assert here. This case would happen if an allocation
- // failed. We've already settled on fallible allocations around
- // here.
- tree = nullptr;
- return false;
- }
- }
- }
- size_t lastElement = firstElement + countElements - 1;
- // Fast-exit path when the global maximum for the whole element array buffer
- // falls in the allowed range:
- T globalMax = tree->GlobalMaximum();
- if (globalMax <= maxAllowedT)
- return true;
- const T* elements = Elements<T>();
- // Before calling tree->Validate, we have to validate ourselves the
- // boundaries of the elements span, to round them to the nearest multiple of
- // kElementsPerLeaf.
- size_t firstElementAdjustmentEnd = std::min(lastElement,
- tree->LastElementUnderSameLeaf(firstElement));
- while (firstElement <= firstElementAdjustmentEnd) {
- const T& curData = elements[firstElement];
- if (curData > maxAllowedT)
- return false;
- firstElement++;
- }
- size_t lastElementAdjustmentEnd = std::max(firstElement,
- tree->FirstElementUnderSameLeaf(lastElement));
- while (lastElement >= lastElementAdjustmentEnd) {
- const T& curData = elements[lastElement];
- if (curData > maxAllowedT)
- return false;
- lastElement--;
- }
- // at this point, for many tiny validations, we're already done.
- if (firstElement > lastElement)
- return true;
- // general case
- return tree->Validate(maxAllowedT, tree->LeafForElement(firstElement),
- tree->LeafForElement(lastElement));
- }
- bool
- WebGLElementArrayCache::Validate(GLenum type, uint32_t maxAllowed,
- size_t firstElement, size_t countElements)
- {
- if (type == LOCAL_GL_UNSIGNED_BYTE)
- return Validate<uint8_t>(maxAllowed, firstElement, countElements);
- if (type == LOCAL_GL_UNSIGNED_SHORT)
- return Validate<uint16_t>(maxAllowed, firstElement, countElements);
- if (type == LOCAL_GL_UNSIGNED_INT)
- return Validate<uint32_t>(maxAllowed, firstElement, countElements);
- MOZ_ASSERT(false, "Invalid type.");
- return false;
- }
- template<typename T>
- static size_t
- SizeOfNullable(mozilla::MallocSizeOf mallocSizeOf, const T& obj)
- {
- if (!obj)
- return 0;
- return obj->SizeOfIncludingThis(mallocSizeOf);
- }
- size_t
- WebGLElementArrayCache::SizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf) const
- {
- return mallocSizeOf(this) +
- mBytes.ShallowSizeOfExcludingThis(mallocSizeOf) +
- SizeOfNullable(mallocSizeOf, mUint8Tree) +
- SizeOfNullable(mallocSizeOf, mUint16Tree) +
- SizeOfNullable(mallocSizeOf, mUint32Tree);
- }
- bool
- WebGLElementArrayCache::BeenUsedWithMultipleTypes() const
- {
- // C++ Standard ($4.7)
- // "If the source type is bool, the value false is converted to zero and
- // the value true is converted to one."
- const int num_types_used = (mUint8Tree != nullptr) +
- (mUint16Tree != nullptr) +
- (mUint32Tree != nullptr);
- return num_types_used > 1;
- }
- } // end namespace mozilla
|