Compare commits

...

5 Commits

Author SHA1 Message Date
Mathias Agopian
0404be5127 new LinearAllocatorWithFallback
LinearAllocatorWithFallback is a linear allocator that can fall back
to the heap allocator. We use it for the high level command buffer to
avoid crashing when running out of memory.

FIXES=[277115740]
2024-02-21 14:52:04 -08:00
Mathias Agopian
a784fb7d8b Rework RenderPass to improve allocations and API
RenderPass now is a fully immutable object that gets constructed with a
RenderPassBuilder. RenderPassBuilder can be passed around and doesn't
do any (major) allocations.

All RenderPass allocations and heavy lifting is done in 
RenderPassBuilder::Build().

Additionally, RenderPass cannot be copied anymore.

Where allocations happen is now much clearer.
2024-02-21 14:51:57 -08:00
Mathias Agopian
8459cbc91c Fix a couple threading vs. allocations
- prepareVisibleLights was run on a dedicated thread (via JobSystem), 
  but was using its own local ArenaScope. This is wrong because it
  could reset the root arena at any later point. This is fixed by
  just not using a local ArenaScope.

- related to the above, the root Arena (LinearAllocatorArena) didn't
  use a locked policy, which cause also cause problems since some
  allocations are done off the main thread. We now pre-allocate the one
  buffer we need.

This PR also renames some variable and types to improve readability.
2024-02-21 14:51:51 -08:00
Mathias Agopian
344c04a2d8 clenaup CircularBuffer implementation and API
Also fix a bug in DEBUG mode that could corrupt the CircularBuffer, it
was due to a wrong debugging code attempting to clear the unused
area of the buffer (this was wrong because in "ashmem" mode, there are
no guaranteed unused areas).
2024-02-21 14:51:44 -08:00
Mathias Agopian
55e2691f3f Automatically flush CommandStream
When generating commands, we now automatically flush the CommandStream,
so that we're guaranteed to not overrun the circular buffer.
2024-02-21 14:51:36 -08:00
26 changed files with 861 additions and 527 deletions

View File

@@ -17,7 +17,10 @@
#ifndef TNT_FILAMENT_BACKEND_PRIVATE_CIRCULARBUFFER_H
#define TNT_FILAMENT_BACKEND_PRIVATE_CIRCULARBUFFER_H
#include <utils/debug.h>
#include <stddef.h>
#include <stdint.h>
namespace filament::backend {
@@ -37,28 +40,36 @@ public:
~CircularBuffer() noexcept;
// allocates 'size' bytes in the circular buffer and returns a pointer to the memory
// return the current head and moves it forward by size bytes
inline void* allocate(size_t size) noexcept {
static size_t getBlockSize() noexcept { return sPageSize; }
// Total size of circular buffer. This is a constant.
size_t size() const noexcept { return mSize; }
// Allocates `s` bytes in the circular buffer and returns a pointer to the memory. All
// allocations must not exceed size() bytes.
inline void* allocate(size_t s) noexcept {
// We can never allocate more that size().
assert_invariant(getUsed() + s <= size());
char* const cur = static_cast<char*>(mHead);
mHead = cur + size;
mHead = cur + s;
return cur;
}
// Total size of circular buffer
size_t size() const noexcept { return mSize; }
// returns true if the buffer is empty (e.g. after calling flush)
// Returns true if the buffer is empty, i.e.: no allocations were made since
// calling getBuffer();
bool empty() const noexcept { return mTail == mHead; }
void* getHead() const noexcept { return mHead; }
// Returns the size used since the last call to getBuffer()
size_t getUsed() const noexcept { return intptr_t(mHead) - intptr_t(mTail); }
void* getTail() const noexcept { return mTail; }
// call at least once every getRequiredSize() bytes allocated from the buffer
void circularize() noexcept;
static size_t getBlockSize() noexcept { return sPageSize; }
// Retrieves the current allocated range and frees it. It is the responsibility of the caller
// to make sure the returned range is no longer in use by the time allocate() allocates
// (size() - getUsed()) bytes.
struct Range {
void* tail;
void* head;
};
Range getBuffer() noexcept;
private:
void* alloc(size_t size) noexcept;
@@ -66,10 +77,10 @@ private:
// pointer to the beginning of the circular buffer (constant)
void* mData = nullptr;
int mUsesAshmem = -1;
int mAshmemFd = -1;
// size of the circular buffer (constant)
size_t mSize = 0;
size_t const mSize;
// pointer to the beginning of recorded data
void* mTail = nullptr;

View File

@@ -33,7 +33,7 @@ namespace filament::backend {
* A producer-consumer command queue that uses a CircularBuffer as main storage
*/
class CommandBufferQueue {
struct Slice {
struct Range {
void* begin;
void* end;
};
@@ -46,7 +46,7 @@ class CommandBufferQueue {
mutable utils::Mutex mLock;
mutable utils::Condition mCondition;
mutable std::vector<Slice> mCommandBuffersToExecute;
mutable std::vector<Range> mCommandBuffersToExecute;
size_t mFreeSpace = 0;
size_t mHighWatermark = 0;
uint32_t mExitRequested = 0;
@@ -58,17 +58,20 @@ public:
CommandBufferQueue(size_t requiredSize, size_t bufferSize);
~CommandBufferQueue();
CircularBuffer& getCircularBuffer() { return mCircularBuffer; }
CircularBuffer& getCircularBuffer() noexcept { return mCircularBuffer; }
CircularBuffer const& getCircularBuffer() const noexcept { return mCircularBuffer; }
size_t getCapacity() const noexcept { return mRequiredSize; }
size_t getHighWatermark() const noexcept { return mHighWatermark; }
// wait for commands to be available and returns an array containing these commands
std::vector<Slice> waitForCommands() const;
std::vector<Range> waitForCommands() const;
// return the memory used by this command buffer to the circular buffer
// WARNING: releaseBuffer() must be called in sequence of the Slices returned by
// waitForCommands()
void releaseBuffer(Slice const& buffer);
void releaseBuffer(Range const& buffer);
// all commands buffers (Slices) written to this point are returned by waitForCommand(). This
// call blocks until the CircularBuffer has at least mRequiredSize bytes available.

View File

@@ -213,6 +213,8 @@ public:
CommandStream(CommandStream const& rhs) noexcept = delete;
CommandStream& operator=(CommandStream const& rhs) noexcept = delete;
CircularBuffer const& getCircularBuffer() const noexcept { return mCurrentBuffer; }
public:
#define DECL_DRIVER_API(methodName, paramsDecl, params) \
inline void methodName(paramsDecl) { \

View File

@@ -231,7 +231,7 @@ private:
explicit Allocator(const utils::AreaPolicy::HeapArea& area);
// this is in fact always called with a constexpr size argument
[[nodiscard]] inline void* alloc(size_t size, size_t, size_t extra) noexcept {
[[nodiscard]] inline void* alloc(size_t size, size_t, size_t extra = 0) noexcept {
void* p = nullptr;
if (size <= mPool0.getSize()) p = mPool0.alloc(size, 16, extra);
else if (size <= mPool1.getSize()) p = mPool1.alloc(size, 16, extra);

View File

@@ -16,6 +16,14 @@
#include "private/backend/CircularBuffer.h"
#include <utils/Log.h>
#include <utils/Panic.h>
#include <utils/architecture.h>
#include <utils/ashmem.h>
#include <utils/compiler.h>
#include <utils/debug.h>
#include <utils/ostream.h>
#if !defined(WIN32) && !defined(__EMSCRIPTEN__) && !defined(IOS)
# include <sys/mman.h>
# include <unistd.h>
@@ -24,23 +32,20 @@
# define HAS_MMAP 0
#endif
#include <stdint.h>
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <utils/architecture.h>
#include <utils/ashmem.h>
#include <utils/debug.h>
#include <utils/Log.h>
#include <utils/Panic.h>
using namespace utils;
namespace filament::backend {
size_t CircularBuffer::sPageSize = arch::getPageSize();
CircularBuffer::CircularBuffer(size_t size) {
CircularBuffer::CircularBuffer(size_t size)
: mSize(size) {
mData = alloc(size);
mSize = size;
mTail = mData;
mHead = mData;
}
@@ -85,7 +90,7 @@ void* CircularBuffer::alloc(size_t size) noexcept {
MAP_PRIVATE, fd, (off_t)size);
if (vaddr_guard != MAP_FAILED && (vaddr_guard == (char*)vaddr_shadow + size)) {
// woo-hoo success!
mUsesAshmem = fd;
mAshmemFd = fd;
data = vaddr;
}
}
@@ -93,7 +98,7 @@ void* CircularBuffer::alloc(size_t size) noexcept {
}
}
if (UTILS_UNLIKELY(mUsesAshmem < 0)) {
if (UTILS_UNLIKELY(mAshmemFd < 0)) {
// ashmem failed
if (vaddr_guard != MAP_FAILED) {
munmap(vaddr_guard, size);
@@ -137,9 +142,9 @@ void CircularBuffer::dealloc() noexcept {
if (mData) {
size_t const BLOCK_SIZE = getBlockSize();
munmap(mData, mSize * 2 + BLOCK_SIZE);
if (mUsesAshmem >= 0) {
close(mUsesAshmem);
mUsesAshmem = -1;
if (mAshmemFd >= 0) {
close(mAshmemFd);
mAshmemFd = -1;
}
}
#else
@@ -149,23 +154,37 @@ void CircularBuffer::dealloc() noexcept {
}
void CircularBuffer::circularize() noexcept {
if (mUsesAshmem > 0) {
intptr_t const overflow = intptr_t(mHead) - (intptr_t(mData) + ssize_t(mSize));
if (overflow >= 0) {
assert_invariant(size_t(overflow) <= mSize);
mHead = (void *) (intptr_t(mData) + overflow);
#ifndef NDEBUG
memset(mData, 0xA5, size_t(overflow));
#endif
}
} else {
// Only circularize if mHead if in the second buffer.
if (intptr_t(mHead) - intptr_t(mData) > ssize_t(mSize)) {
CircularBuffer::Range CircularBuffer::getBuffer() noexcept {
Range const range{ .tail = mTail, .head = mHead };
char* const pData = static_cast<char*>(mData);
char const* const pEnd = pData + mSize;
char const* const pHead = static_cast<char const*>(mHead);
if (UTILS_UNLIKELY(pHead >= pEnd)) {
size_t const overflow = pHead - pEnd;
if (UTILS_LIKELY(mAshmemFd > 0)) {
assert_invariant(overflow <= mSize);
mHead = static_cast<void*>(pData + overflow);
// Data Tail End Head [virtual]
// v v v v
// +-------------:----+-----:--------------+
// | : | : |
// +-----:------------+--------------------+
// Head |<------ copy ------>| [physical]
} else {
// Data Tail End Head
// v v v v
// +-------------:----+-----:--------------+
// | : | : |
// +-----|------------+-----|--------------+
// |<---------------->|
// sliding window
mHead = mData;
}
}
mTail = mHead;
return range;
}
} // namespace filament::backend

View File

@@ -15,14 +15,25 @@
*/
#include "private/backend/CommandBufferQueue.h"
#include "private/backend/CircularBuffer.h"
#include "private/backend/CommandStream.h"
#include <utils/compiler.h>
#include <utils/Log.h>
#include <utils/Systrace.h>
#include <utils/Mutex.h>
#include <utils/ostream.h>
#include <utils/Panic.h>
#include <utils/Systrace.h>
#include <utils/debug.h>
#include "private/backend/BackendUtils.h"
#include "private/backend/CommandStream.h"
#include <algorithm>
#include <mutex>
#include <iterator>
#include <utility>
#include <vector>
#include <stddef.h>
#include <stdint.h>
using namespace utils;
@@ -65,50 +76,53 @@ void CommandBufferQueue::flush() noexcept {
// always guaranteed to have enough space for the NoopCommand
new(circularBuffer.allocate(sizeof(NoopCommand))) NoopCommand(nullptr);
// end of this slice
void* const head = circularBuffer.getHead();
const size_t requiredSize = mRequiredSize;
// beginning of this slice
void* const tail = circularBuffer.getTail();
// get the current buffer
auto const [begin, end] = circularBuffer.getBuffer();
// size of this slice
uint32_t const used = uint32_t(intptr_t(head) - intptr_t(tail));
assert_invariant(circularBuffer.empty());
circularBuffer.circularize();
// size of the current buffer
size_t const used = std::distance(
static_cast<char const*>(begin), static_cast<char const*>(end));
std::unique_lock<utils::Mutex> lock(mLock);
mCommandBuffersToExecute.push_back({ tail, head });
mCommandBuffersToExecute.push_back({ begin, end });
mCondition.notify_one();
// circular buffer is too small, we corrupted the stream
ASSERT_POSTCONDITION(used <= mFreeSpace,
"Backend CommandStream overflow. Commands are corrupted and unrecoverable.\n"
"Please increase minCommandBufferSizeMB inside the Config passed to Engine::create.\n"
"Space used at this time: %u bytes",
(unsigned)used);
"Space used at this time: %u bytes, overflow: %u bytes",
(unsigned)used, unsigned(used - mFreeSpace));
// wait until there is enough space in the buffer
mFreeSpace -= used;
const size_t requiredSize = mRequiredSize;
if (UTILS_UNLIKELY(mFreeSpace < requiredSize)) {
#ifndef NDEBUG
size_t totalUsed = circularBuffer.size() - mFreeSpace;
mHighWatermark = std::max(mHighWatermark, totalUsed);
if (UTILS_UNLIKELY(totalUsed > requiredSize)) {
slog.d << "CommandStream used too much space: " << totalUsed
<< ", out of " << requiredSize << " (will block)" << io::endl;
}
size_t const totalUsed = circularBuffer.size() - mFreeSpace;
slog.d << "CommandStream used too much space (will block): "
<< "needed space " << requiredSize << " out of " << mFreeSpace
<< ", totalUsed=" << totalUsed << ", current=" << used
<< ", queue size=" << mCommandBuffersToExecute.size() << " buffers"
<< io::endl;
mHighWatermark = std::max(mHighWatermark, totalUsed);
#endif
mCondition.notify_one();
if (UTILS_LIKELY(mFreeSpace < requiredSize)) {
SYSTRACE_NAME("waiting: CircularBuffer::flush()");
mCondition.wait(lock, [this, requiredSize]() -> bool {
// TODO: on macOS, we need to call pumpEvents from time to time
return mFreeSpace >= requiredSize;
});
}
}
std::vector<CommandBufferQueue::Slice> CommandBufferQueue::waitForCommands() const {
std::vector<CommandBufferQueue::Range> CommandBufferQueue::waitForCommands() const {
if (!UTILS_HAS_THREADING) {
return std::move(mCommandBuffersToExecute);
}
@@ -123,7 +137,7 @@ std::vector<CommandBufferQueue::Slice> CommandBufferQueue::waitForCommands() con
return std::move(mCommandBuffersToExecute);
}
void CommandBufferQueue::releaseBuffer(CommandBufferQueue::Slice const& buffer) {
void CommandBufferQueue::releaseBuffer(CommandBufferQueue::Range const& buffer) {
std::lock_guard<utils::Mutex> const lock(mLock);
mFreeSpace += uintptr_t(buffer.end) - uintptr_t(buffer.begin);
mCondition.notify_one();

View File

@@ -54,7 +54,7 @@ using LinearAllocatorArena = utils::Arena<
#endif
using ArenaScope = utils::ArenaScope<LinearAllocatorArena>;
using RootArenaScope = utils::ArenaScope<LinearAllocatorArena>;
} // namespace filament

View File

@@ -168,7 +168,8 @@ void Froxelizer::setProjection(const mat4f& projection,
}
bool Froxelizer::prepare(
FEngine::DriverApi& driverApi, ArenaScope& arena, filament::Viewport const& viewport,
FEngine::DriverApi& driverApi, RootArenaScope& rootArenaScope,
filament::Viewport const& viewport,
const mat4f& projection, float projectionNear, float projectionFar) noexcept {
setViewport(viewport);
setProjection(projection, projectionNear, projectionFar);
@@ -199,12 +200,12 @@ bool Froxelizer::prepare(
// light records per froxel (~256 KiB)
mLightRecords = {
arena.allocate<LightRecord>(getFroxelBufferEntryCount(), CACHELINE_SIZE),
rootArenaScope.allocate<LightRecord>(getFroxelBufferEntryCount(), CACHELINE_SIZE),
getFroxelBufferEntryCount() };
// froxel thread data (~256 KiB)
mFroxelShardedData = {
arena.allocate<FroxelThreadData>(GROUP_COUNT, CACHELINE_SIZE),
rootArenaScope.allocate<FroxelThreadData>(GROUP_COUNT, CACHELINE_SIZE),
uint32_t(GROUP_COUNT)
};

View File

@@ -110,7 +110,7 @@ public:
*
* return true if updateUniforms() needs to be called
*/
bool prepare(backend::DriverApi& driverApi, ArenaScope& arena, Viewport const& viewport,
bool prepare(backend::DriverApi& driverApi, RootArenaScope& rootArenaScope, Viewport const& viewport,
const math::mat4f& projection, float projectionNear, float projectionFar) noexcept;
Froxel getFroxelAt(size_t x, size_t y, size_t z) const noexcept;

View File

@@ -414,7 +414,7 @@ void PostProcessManager::commitAndRender(FrameGraphResources::RenderPassInfo con
// ------------------------------------------------------------------------------------------------
PostProcessManager::StructurePassOutput PostProcessManager::structure(FrameGraph& fg,
RenderPass const& pass, uint8_t structureRenderFlags,
RenderPassBuilder const& passBuilder, uint8_t structureRenderFlags,
uint32_t width, uint32_t height,
StructurePassConfig const& config) noexcept {
@@ -466,17 +466,19 @@ PostProcessManager::StructurePassOutput PostProcessManager::structure(FrameGraph
.clearFlags = TargetBufferFlags::COLOR0 | TargetBufferFlags::DEPTH
});
},
[=, renderPass = pass](FrameGraphResources const& resources,
[=, passBuilder = passBuilder](FrameGraphResources const& resources,
auto const&, DriverApi&) mutable {
Variant structureVariant(Variant::DEPTH_VARIANT);
structureVariant.setPicking(config.picking);
auto out = resources.getRenderPassInfo();
renderPass.setRenderFlags(structureRenderFlags);
renderPass.setVariant(structureVariant);
renderPass.appendCommands(mEngine, RenderPass::CommandTypeFlags::SSAO);
renderPass.sortCommands(mEngine);
renderPass.execute(mEngine, resources.getPassName(), out.target, out.params);
passBuilder.renderFlags(structureRenderFlags);
passBuilder.variant(structureVariant);
passBuilder.commandTypeFlags(RenderPass::CommandTypeFlags::SSAO);
RenderPass const pass{ passBuilder.build(mEngine) };
RenderPass::execute(pass, mEngine, resources.getPassName(), out.target, out.params);
});
auto depth = structurePass->depth;
@@ -523,7 +525,7 @@ PostProcessManager::StructurePassOutput PostProcessManager::structure(FrameGraph
// ------------------------------------------------------------------------------------------------
FrameGraphId<FrameGraphTexture> PostProcessManager::ssr(FrameGraph& fg,
RenderPass const& pass,
RenderPassBuilder const& passBuilder,
FrameHistory const& frameHistory,
CameraInfo const& cameraInfo,
PerViewUniforms& uniforms,
@@ -586,7 +588,7 @@ FrameGraphId<FrameGraphTexture> PostProcessManager::ssr(FrameGraph& fg,
},
[this, projection = cameraInfo.projection,
userViewMatrix = cameraInfo.getUserViewMatrix(), uvFromClipMatrix, historyProjection,
options, &uniforms, renderPass = pass]
options, &uniforms, passBuilder = passBuilder]
(FrameGraphResources const& resources, auto const& data, DriverApi& driver) mutable {
// set structure sampler
uniforms.prepareStructure(data.structure ?
@@ -607,17 +609,17 @@ FrameGraphId<FrameGraphTexture> PostProcessManager::ssr(FrameGraph& fg,
auto out = resources.getRenderPassInfo();
// Remove the HAS_SHADOWING RenderFlags, since it's irrelevant when rendering reflections
RenderPass::RenderFlags flags = renderPass.getRenderFlags();
flags &= ~RenderPass::HAS_SHADOWING;
renderPass.setRenderFlags(flags);
passBuilder.renderFlags(~RenderPass::HAS_SHADOWING, 0);
// use our special SSR variant, it can only be applied to object that have
// the SCREEN_SPACE ReflectionMode.
renderPass.setVariant(Variant{Variant::SPECIAL_SSR});
passBuilder.variant(Variant{ Variant::SPECIAL_SSR });
// generate all our drawing commands, except blended objects.
renderPass.appendCommands(mEngine, RenderPass::CommandTypeFlags::SCREEN_SPACE_REFLECTIONS);
renderPass.sortCommands(mEngine);
renderPass.execute(mEngine, resources.getPassName(), out.target, out.params);
passBuilder.commandTypeFlags(RenderPass::CommandTypeFlags::SCREEN_SPACE_REFLECTIONS);
RenderPass const pass{ passBuilder.build(mEngine) };
RenderPass::execute(pass, mEngine, resources.getPassName(), out.target, out.params);
});
return ssrPass->reflections;

View File

@@ -50,6 +50,7 @@ class FMaterialInstance;
class FrameGraph;
class PerViewUniforms;
class RenderPass;
class RenderPassBuilder;
struct CameraInfo;
class PostProcessManager {
@@ -99,12 +100,12 @@ public:
FrameGraphId<FrameGraphTexture> picking;
};
StructurePassOutput structure(FrameGraph& fg,
RenderPass const& pass, uint8_t structureRenderFlags,
RenderPassBuilder const& passBuilder, uint8_t structureRenderFlags,
uint32_t width, uint32_t height, StructurePassConfig const& config) noexcept;
// reflections pass
FrameGraphId<FrameGraphTexture> ssr(FrameGraph& fg,
RenderPass const& pass,
RenderPassBuilder const& passBuilder,
FrameHistory const& frameHistory,
CameraInfo const& cameraInfo,
PerViewUniforms& uniforms,

View File

@@ -19,17 +19,43 @@
#include "RenderPrimitive.h"
#include "ShadowMap.h"
#include "details/Camera.h"
#include "details/Material.h"
#include "details/MaterialInstance.h"
#include "details/View.h"
#include "components/RenderableManager.h"
#include <private/filament/EngineEnums.h>
#include <private/filament/UibStructs.h>
#include <private/filament/Variant.h>
#include <filament/MaterialEnums.h>
#include <backend/DriverApiForward.h>
#include <backend/DriverEnums.h>
#include <backend/Handle.h>
#include <backend/PipelineState.h>
#include "private/backend/CircularBuffer.h"
#include <utils/compiler.h>
#include <utils/debug.h>
#include <utils/JobSystem.h>
#include <utils/Panic.h>
#include <utils/Slice.h>
#include <utils/Systrace.h>
#include <utils/Range.h>
#include <algorithm>
#include <functional>
#include <limits>
#include <utility>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
using namespace utils;
using namespace filament::math;
@@ -37,60 +63,103 @@ namespace filament {
using namespace backend;
RenderPass::RenderPass(FEngine& engine,
RenderPass::Arena& arena) noexcept
: mCommandArena(arena),
mCustomCommands(engine.getPerRenderPassAllocator()) {
RenderPassBuilder& RenderPassBuilder::customCommand(
FEngine& engine,
uint8_t channel,
RenderPass::Pass pass,
RenderPass::CustomCommand custom,
uint32_t order,
RenderPass::Executor::CustomCommandFn const& command) {
if (!mCustomCommands.has_value()) {
// construct the vector the first time
mCustomCommands.emplace(engine.getPerRenderPassArena());
}
mCustomCommands->emplace_back(channel, pass, custom, order, command);
return *this;
}
RenderPass::RenderPass(RenderPass const& rhs) = default;
RenderPass RenderPassBuilder::build(FEngine& engine) {
ASSERT_POSTCONDITION(mRenderableSoa, "RenderPassBuilder::geometry() hasn't been called");
assert_invariant(mScissorViewport.width <= std::numeric_limits<int32_t>::max());
assert_invariant(mScissorViewport.height <= std::numeric_limits<int32_t>::max());
return RenderPass{ engine, *this };
}
// ------------------------------------------------------------------------------------------------
RenderPass::RenderPass(FEngine& engine, RenderPassBuilder const& builder) noexcept
: mRenderableSoa(*builder.mRenderableSoa),
mVisibleRenderables(builder.mVisibleRenderables),
mUboHandle(builder.mUboHandle),
mCameraPosition(builder.mCameraPosition),
mCameraForwardVector(builder.mCameraForwardVector),
mFlags(builder.mFlags),
mVariant(builder.mVariant),
mVisibilityMask(builder.mVisibilityMask),
mScissorViewport(builder.mScissorViewport),
mCustomCommands(engine.getPerRenderPassArena()) {
// compute the number of commands we need
updateSummedPrimitiveCounts(
const_cast<FScene::RenderableSoa&>(mRenderableSoa), mVisibleRenderables);
uint32_t commandCount =
FScene::getPrimitiveCount(mRenderableSoa, mVisibleRenderables.last);
const bool colorPass = bool(builder.mCommandTypeFlags & CommandTypeFlags::COLOR);
const bool depthPass = bool(builder.mCommandTypeFlags & CommandTypeFlags::DEPTH);
commandCount *= uint32_t(colorPass * 2 + depthPass);
commandCount += 1; // for the sentinel
uint32_t const customCommandCount =
builder.mCustomCommands.has_value() ? builder.mCustomCommands->size() : 0;
Command* const curr = builder.mArena.alloc<Command>(commandCount + customCommandCount);
assert_invariant(curr);
if (UTILS_UNLIKELY(builder.mArena.getAllocator().isHeapAllocation(curr))) {
static bool sLogOnce = true;
if (UTILS_UNLIKELY(sLogOnce)) {
sLogOnce = false;
PANIC_LOG("RenderPass arena is full, using slower system heap. Please increase "
"the appropriate constant (e.g. FILAMENT_PER_RENDER_PASS_ARENA_SIZE_IN_MB).");
}
}
mCommandBegin = curr;
mCommandEnd = curr + commandCount + customCommandCount;
appendCommands(engine, { curr, commandCount }, builder.mCommandTypeFlags);
if (builder.mCustomCommands.has_value()) {
Command* p = curr + commandCount;
for (auto [channel, passId, command, order, fn]: builder.mCustomCommands.value()) {
appendCustomCommand(p++, channel, passId, command, order, fn);
}
}
// sort commands once we're done adding commands
sortCommands(builder.mArena);
if (engine.isAutomaticInstancingEnabled()) {
instanceify(engine, builder.mArena);
}
}
// this destructor is actually heavy because it inlines ~vector<>
RenderPass::~RenderPass() noexcept = default;
RenderPass::Command* RenderPass::append(size_t count) noexcept {
// this is like an "in-place" realloc(). Works only with LinearAllocator.
Command* const curr = mCommandArena.alloc<Command>(count);
assert_invariant(curr);
assert_invariant(mCommandBegin == nullptr || curr == mCommandEnd);
if (mCommandBegin == nullptr) {
mCommandBegin = mCommandEnd = curr;
}
mCommandEnd += count;
return curr;
}
void RenderPass::resize(size_t count) noexcept {
void RenderPass::resize(Arena& arena, size_t count) noexcept {
if (mCommandBegin) {
mCommandEnd = mCommandBegin + count;
mCommandArena.rewind(mCommandEnd);
arena.rewind(mCommandEnd);
}
}
void RenderPass::setGeometry(FScene::RenderableSoa const& soa, Range<uint32_t> vr,
backend::Handle<backend::HwBufferObject> uboHandle) noexcept {
mRenderableSoa = &soa;
mVisibleRenderables = vr;
mUboHandle = uboHandle;
}
void RenderPass::setCamera(const CameraInfo& camera) noexcept {
mCameraPosition = camera.getPosition();
mCameraForwardVector = camera.getForwardVector();
}
void RenderPass::setScissorViewport(backend::Viewport viewport) noexcept {
assert_invariant(viewport.width <= std::numeric_limits<int32_t>::max());
assert_invariant(viewport.height <= std::numeric_limits<int32_t>::max());
mScissorViewport = viewport;
}
void RenderPass::appendCommands(FEngine& engine, CommandTypeFlags const commandTypeFlags) noexcept {
void RenderPass::appendCommands(FEngine& engine,
Slice<Command> commands, CommandTypeFlags const commandTypeFlags) noexcept {
SYSTRACE_CALL();
SYSTRACE_CONTEXT();
assert_invariant(mRenderableSoa);
utils::Range<uint32_t> const vr = mVisibleRenderables;
// trace the number of visible renderables
SYSTRACE_VALUE32("visibleRenderables", vr.size());
@@ -104,17 +173,10 @@ void RenderPass::appendCommands(FEngine& engine, CommandTypeFlags const commandT
const FScene::VisibleMaskType visibilityMask = mVisibilityMask;
// up-to-date summed primitive counts needed for generateCommands()
FScene::RenderableSoa const& soa = *mRenderableSoa;
updateSummedPrimitiveCounts(const_cast<FScene::RenderableSoa&>(soa), vr);
FScene::RenderableSoa const& soa = mRenderableSoa;
// compute how much maximum storage we need for this pass
uint32_t commandCount = FScene::getPrimitiveCount(soa, vr.last);
// double the color pass for transparent objects that need to render twice
const bool colorPass = bool(commandTypeFlags & CommandTypeFlags::COLOR);
const bool depthPass = bool(commandTypeFlags & CommandTypeFlags::DEPTH);
commandCount *= uint32_t(colorPass * 2 + depthPass);
commandCount += 1; // for the sentinel
Command* const curr = append(commandCount);
Command* curr = commands.data();
size_t const commandCount = commands.size();
auto stereoscopicEyeCount =
renderFlags & IS_STEREOSCOPIC ? engine.getConfig().stereoscopicEyeCount : 1;
@@ -152,7 +214,8 @@ void RenderPass::appendCommands(FEngine& engine, CommandTypeFlags const commandT
}
}
void RenderPass::appendCustomCommand(uint8_t channel, Pass pass, CustomCommand custom, uint32_t order,
void RenderPass::appendCustomCommand(Command* commands,
uint8_t channel, Pass pass, CustomCommand custom, uint32_t order,
Executor::CustomCommandFn command) {
assert_invariant((uint64_t(order) << CUSTOM_ORDER_SHIFT) <= CUSTOM_ORDER_MASK);
@@ -168,11 +231,10 @@ void RenderPass::appendCustomCommand(uint8_t channel, Pass pass, CustomCommand c
cmd |= uint64_t(order) << CUSTOM_ORDER_SHIFT;
cmd |= uint64_t(index);
Command* const curr = append(1);
curr->key = cmd;
commands->key = cmd;
}
void RenderPass::sortCommands(FEngine& engine) noexcept {
void RenderPass::sortCommands(Arena& arena) noexcept {
SYSTRACE_NAME("sort and trim commands");
std::sort(mCommandBegin, mCommandEnd);
@@ -183,30 +245,20 @@ void RenderPass::sortCommands(FEngine& engine) noexcept {
return c.key != uint64_t(Pass::SENTINEL);
});
resize(uint32_t(last - mCommandBegin));
if (engine.isAutomaticInstancingEnabled()) {
instanceify(engine);
}
resize(arena, uint32_t(last - mCommandBegin));
}
void RenderPass::execute(FEngine& engine, const char* name,
void RenderPass::execute(RenderPass const& pass,
FEngine& engine, const char* name,
backend::Handle<backend::HwRenderTarget> renderTarget,
backend::RenderPassParams params) const noexcept {
backend::RenderPassParams params) noexcept {
DriverApi& driver = engine.getDriverApi();
// this is a good time to flush the CommandStream, because we're about to potentially
// output a lot of commands. This guarantees here that we have at least
// FILAMENT_MIN_COMMAND_BUFFERS_SIZE_IN_MB bytes (1MiB by default).
engine.flush();
driver.beginRenderPass(renderTarget, params);
getExecutor().execute(engine, name);
pass.getExecutor().execute(engine, name);
driver.endRenderPass();
}
void RenderPass::instanceify(FEngine& engine) noexcept {
void RenderPass::instanceify(FEngine& engine, Arena& arena) noexcept {
SYSTRACE_NAME("instanceify");
// instanceify works by scanning the **sorted** command stream, looking for repeat draw
@@ -262,7 +314,8 @@ void RenderPass::instanceify(FEngine& engine) noexcept {
// buffer large enough for all instances data
stagingBufferSize = sizeof(PerRenderableData) * (last - curr);
stagingBuffer = (PerRenderableData*)::malloc(stagingBufferSize);
uboData = mRenderableSoa->data<FScene::UBO>();
uboData = mRenderableSoa.data<FScene::UBO>();
assert_invariant(uboData);
}
// copy the ubo data to a staging buffer
@@ -315,7 +368,7 @@ void RenderPass::instanceify(FEngine& engine) noexcept {
return command.key == uint64_t(Pass::SENTINEL);
});
resize(uint32_t(lastCommand - mCommandBegin));
resize(arena, uint32_t(lastCommand - mCommandBegin));
}
assert_invariant(stagingBuffer == nullptr);
@@ -323,7 +376,7 @@ void RenderPass::instanceify(FEngine& engine) noexcept {
/* static */
UTILS_ALWAYS_INLINE // this function exists only to make the code more readable. we want it inlined.
UTILS_ALWAYS_INLINE // This function exists only to make the code more readable. we want it inlined.
inline // and we don't need it in the compilation unit
void RenderPass::setupColorCommand(Command& cmdDraw, Variant variant,
FMaterialInstance const* const UTILS_RESTRICT mi, bool inverseFrontFaces) noexcept {
@@ -374,7 +427,7 @@ void RenderPass::setupColorCommand(Command& cmdDraw, Variant variant,
/* static */
UTILS_NOINLINE
void RenderPass::generateCommands(uint32_t commandTypeFlags, Command* const commands,
void RenderPass::generateCommands(CommandTypeFlags commandTypeFlags, Command* const commands,
FScene::RenderableSoa const& soa, Range<uint32_t> range,
Variant variant, RenderFlags renderFlags,
FScene::VisibleMaskType visibilityMask, float3 cameraPosition, float3 cameraForward,
@@ -432,9 +485,9 @@ void RenderPass::generateCommands(uint32_t commandTypeFlags, Command* const comm
}
/* static */
template<uint32_t commandTypeFlags>
template<RenderPass::CommandTypeFlags commandTypeFlags>
UTILS_NOINLINE
RenderPass::Command* RenderPass::generateCommandsImpl(uint32_t extraFlags,
RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFlags extraFlags,
Command* UTILS_RESTRICT curr,
FScene::RenderableSoa const& UTILS_RESTRICT soa, Range<uint32_t> range,
Variant const variant, RenderFlags renderFlags, FScene::VisibleMaskType visibilityMask,
@@ -737,13 +790,13 @@ void RenderPass::updateSummedPrimitiveCounts(
// ------------------------------------------------------------------------------------------------
void RenderPass::Executor::overridePolygonOffset(backend::PolygonOffset const* polygonOffset) noexcept {
if ((mPolygonOffsetOverride = (polygonOffset != nullptr))) {
if ((mPolygonOffsetOverride = (polygonOffset != nullptr))) { // NOLINT(*-assignment-in-if-condition)
mPolygonOffset = *polygonOffset;
}
}
void RenderPass::Executor::overrideScissor(backend::Viewport const* scissor) noexcept {
if ((mScissorOverride = (scissor != nullptr))) {
if ((mScissorOverride = (scissor != nullptr))) { // NOLINT(*-assignment-in-if-condition)
mScissor = *scissor;
}
}
@@ -754,15 +807,20 @@ void RenderPass::Executor::overrideScissor(backend::Viewport const& scissor) noe
}
void RenderPass::Executor::execute(FEngine& engine, const char*) const noexcept {
execute(engine.getDriverApi(), mCommands.begin(), mCommands.end());
execute(engine, mCommands.begin(), mCommands.end());
}
UTILS_NOINLINE // no need to be inlined
void RenderPass::Executor::execute(backend::DriverApi& driver,
void RenderPass::Executor::execute(FEngine& engine,
const Command* first, const Command* last) const noexcept {
SYSTRACE_CALL();
SYSTRACE_CONTEXT();
DriverApi& driver = engine.getDriverApi();
size_t const capacity = engine.getMinCommandBufferSize();
CircularBuffer const& circularBuffer = driver.getCircularBuffer();
if (first != last) {
SYSTRACE_VALUE32("commandCount", last - first);
@@ -781,126 +839,163 @@ void RenderPass::Executor::execute(backend::DriverApi& driver,
FMaterial const* UTILS_RESTRICT ma = nullptr;
auto const* UTILS_RESTRICT pCustomCommands = mCustomCommands.data();
first--;
while (++first != last) {
assert_invariant(first->key != uint64_t(Pass::SENTINEL));
// Maximum space occupied in the CircularBuffer by a single `Command`. This must be
// reevaluated when the inner loop below adds DriverApi commands or when we change the
// CommandStream protocol. Currently, the maximum is 240 bytes, and we use 256 to be on
// the safer side.
size_t const maxCommandSizeInBytes = 256;
/*
* Be careful when changing code below, this is the hot inner-loop
*/
// Number of Commands that can be issued and guaranteed to fit in the current
// CircularBuffer allocation. In practice, we'll have tons of headroom especially if
// skinning and morphing aren't used. With a 2 MiB buffer (the default) a batch is
// 8192 commands (i.e. draw calls).
size_t const batchCommandCount = capacity / maxCommandSizeInBytes;
while(first != last) {
Command const* const batchLast = std::min(first + batchCommandCount, last);
if (UTILS_UNLIKELY((first->key & CUSTOM_MASK) != uint64_t(CustomCommand::PASS))) {
mi = nullptr; // custom command could change the currently bound MaterialInstance
uint32_t const index = (first->key & CUSTOM_INDEX_MASK) >> CUSTOM_INDEX_SHIFT;
assert_invariant(index < mCustomCommands.size());
pCustomCommands[index]();
continue;
// actual number of commands we need to write (can be smaller than batchCommandCount)
size_t const commandCount = batchLast - first;
size_t const commandSizeInBytes = commandCount * maxCommandSizeInBytes;
// check we have enough capacity to write these commandCount commands, if not,
// request a new CircularBuffer allocation of `capacity` bytes.
if (UTILS_UNLIKELY(circularBuffer.getUsed() > capacity - commandSizeInBytes)) {
engine.flush(); // TODO: we should use a "fast" flush if possible
}
// primitiveHandle may be invalid if no geometry was set on the renderable.
if (UTILS_UNLIKELY(!first->primitive.primitiveHandle)) {
continue;
}
first--;
while (++first != batchLast) {
assert_invariant(first->key != uint64_t(Pass::SENTINEL));
// per-renderable uniform
const PrimitiveInfo info = first->primitive;
pipeline.rasterState = info.rasterState;
/*
* Be careful when changing code below, this is the hot inner-loop
*/
if (UTILS_UNLIKELY(mi != info.mi)) {
// this is always taken the first time
mi = info.mi;
ma = mi->getMaterial();
auto const& scissor = mi->getScissor();
if (UTILS_UNLIKELY(mi->hasScissor())) {
// scissor is set, we need to apply the offset/clip
// clang vectorizes this!
constexpr int32_t maxvali = std::numeric_limits<int32_t>::max();
const backend::Viewport scissorViewport = mScissorViewport;
// compute new left/bottom, assume no overflow
int32_t l = scissor.left + scissorViewport.left;
int32_t b = scissor.bottom + scissorViewport.bottom;
// compute right/top without overflowing, scissor.width/height guaranteed
// to convert to int32
int32_t r = (l > maxvali - int32_t(scissor.width)) ?
maxvali : l + int32_t(scissor.width);
int32_t t = (b > maxvali - int32_t(scissor.height)) ?
maxvali : b + int32_t(scissor.height);
// clip to the viewport
l = std::max(l, scissorViewport.left);
b = std::max(b, scissorViewport.bottom);
r = std::min(r, scissorViewport.left + int32_t(scissorViewport.width));
t = std::min(t, scissorViewport.bottom + int32_t(scissorViewport.height));
assert_invariant(r >= l && t >= b);
*pScissor = { l, b, uint32_t(r - l), uint32_t(t - b) };
} else {
// no scissor set (common case), 'scissor' has its default value, use that.
*pScissor = scissor;
if (UTILS_UNLIKELY((first->key & CUSTOM_MASK) != uint64_t(CustomCommand::PASS))) {
mi = nullptr; // custom command could change the currently bound MaterialInstance
uint32_t const index = (first->key & CUSTOM_INDEX_MASK) >> CUSTOM_INDEX_SHIFT;
assert_invariant(index < mCustomCommands.size());
pCustomCommands[index]();
continue;
}
*pPipelinePolygonOffset = mi->getPolygonOffset();
pipeline.stencilState = mi->getStencilState();
mi->use(driver);
}
pipeline.program = ma->getProgram(info.materialVariant);
uint16_t const instanceCount = info.instanceCount & PrimitiveInfo::INSTANCE_COUNT_MASK;
auto getPerObjectUboHandle =
[this, &info, &instanceCount]() -> std::pair<Handle<backend::HwBufferObject>, uint32_t> {
if (info.instanceBufferHandle) {
// "hybrid" instancing -- instanceBufferHandle takes the place of the UBO
return { info.instanceBufferHandle, 0 };
// primitiveHandle may be invalid if no geometry was set on the renderable.
if (UTILS_UNLIKELY(!first->primitive.primitiveHandle)) {
continue;
}
bool const userInstancing =
(info.instanceCount & PrimitiveInfo::USER_INSTANCE_MASK) != 0u;
if (!userInstancing && instanceCount > 1) {
// automatic instancing
return { mInstancedUboHandle, info.index * sizeof(PerRenderableData) };
} else {
// manual instancing
return { mUboHandle, info.index * sizeof(PerRenderableData) };
// per-renderable uniform
const PrimitiveInfo info = first->primitive;
pipeline.rasterState = info.rasterState;
if (UTILS_UNLIKELY(mi != info.mi)) {
// this is always taken the first time
mi = info.mi;
assert_invariant(mi);
ma = mi->getMaterial();
auto const& scissor = mi->getScissor();
if (UTILS_UNLIKELY(mi->hasScissor())) {
// scissor is set, we need to apply the offset/clip
// clang vectorizes this!
constexpr int32_t maxvali = std::numeric_limits<int32_t>::max();
const backend::Viewport scissorViewport = mScissorViewport;
// compute new left/bottom, assume no overflow
int32_t l = scissor.left + scissorViewport.left;
int32_t b = scissor.bottom + scissorViewport.bottom;
// compute right/top without overflowing, scissor.width/height guaranteed
// to convert to int32
int32_t r = (l > maxvali - int32_t(scissor.width)) ?
maxvali : l + int32_t(scissor.width);
int32_t t = (b > maxvali - int32_t(scissor.height)) ?
maxvali : b + int32_t(scissor.height);
// clip to the viewport
l = std::max(l, scissorViewport.left);
b = std::max(b, scissorViewport.bottom);
r = std::min(r, scissorViewport.left + int32_t(scissorViewport.width));
t = std::min(t, scissorViewport.bottom + int32_t(scissorViewport.height));
assert_invariant(r >= l && t >= b);
*pScissor = { l, b, uint32_t(r - l), uint32_t(t - b) };
} else {
// no scissor set (common case), 'scissor' has its default value, use that.
*pScissor = scissor;
}
*pPipelinePolygonOffset = mi->getPolygonOffset();
pipeline.stencilState = mi->getStencilState();
mi->use(driver);
}
};
// bind per-renderable uniform block. there is no need to attempt to skip this command
// because the backends already do this.
auto const [perObjectUboHandle, offset] = getPerObjectUboHandle();
assert_invariant(perObjectUboHandle);
driver.bindBufferRange(BufferObjectBinding::UNIFORM,
+UniformBindingPoints::PER_RENDERABLE,
perObjectUboHandle,
offset,
sizeof(PerRenderableUib));
assert_invariant(ma);
pipeline.program = ma->getProgram(info.materialVariant);
if (UTILS_UNLIKELY(info.skinningHandle)) {
// note: we can't bind less than sizeof(PerRenderableBoneUib) due to glsl limitations
uint16_t const instanceCount =
info.instanceCount & PrimitiveInfo::INSTANCE_COUNT_MASK;
auto getPerObjectUboHandle =
[this, &info, &instanceCount]() -> std::pair<Handle<backend::HwBufferObject>, uint32_t> {
if (info.instanceBufferHandle) {
// "hybrid" instancing -- instanceBufferHandle takes the place of the UBO
return { info.instanceBufferHandle, 0 };
}
bool const userInstancing =
(info.instanceCount & PrimitiveInfo::USER_INSTANCE_MASK) != 0u;
if (!userInstancing && instanceCount > 1) {
// automatic instancing
return {
mInstancedUboHandle,
info.index * sizeof(PerRenderableData) };
} else {
// manual instancing
return { mUboHandle, info.index * sizeof(PerRenderableData) };
}
};
// Bind per-renderable uniform block. There is no need to attempt to skip this command
// because the backends already do this.
auto const [perObjectUboHandle, offset] = getPerObjectUboHandle();
assert_invariant(perObjectUboHandle);
driver.bindBufferRange(BufferObjectBinding::UNIFORM,
+UniformBindingPoints::PER_RENDERABLE_BONES,
info.skinningHandle,
info.skinningOffset * sizeof(PerRenderableBoneUib::BoneData),
sizeof(PerRenderableBoneUib));
// note: always bind the skinningTexture because the shader needs it.
driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING,
info.skinningTexture);
// note: even if only skinning is enabled, binding morphTargetBuffer is needed.
driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING,
info.morphTargetBuffer);
}
+UniformBindingPoints::PER_RENDERABLE,
perObjectUboHandle,
offset,
sizeof(PerRenderableUib));
if (UTILS_UNLIKELY(info.morphWeightBuffer)) {
// Instead of using a UBO per primitive, we could also have a single UBO for all
// primitives and use bindUniformBufferRange which might be more efficient.
driver.bindUniformBuffer(+UniformBindingPoints::PER_RENDERABLE_MORPHING,
info.morphWeightBuffer);
driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING,
info.morphTargetBuffer);
// note: even if only morphing is enabled, binding skinningTexture is needed.
driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING,
info.skinningTexture);
if (UTILS_UNLIKELY(info.skinningHandle)) {
// note: we can't bind less than sizeof(PerRenderableBoneUib) due to glsl limitations
driver.bindBufferRange(BufferObjectBinding::UNIFORM,
+UniformBindingPoints::PER_RENDERABLE_BONES,
info.skinningHandle,
info.skinningOffset * sizeof(PerRenderableBoneUib::BoneData),
sizeof(PerRenderableBoneUib));
// note: always bind the skinningTexture because the shader needs it.
driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING,
info.skinningTexture);
// note: even if only skinning is enabled, binding morphTargetBuffer is needed.
driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING,
info.morphTargetBuffer);
}
if (UTILS_UNLIKELY(info.morphWeightBuffer)) {
// Instead of using a UBO per primitive, we could also have a single UBO for all
// primitives and use bindUniformBufferRange which might be more efficient.
driver.bindUniformBuffer(+UniformBindingPoints::PER_RENDERABLE_MORPHING,
info.morphWeightBuffer);
driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING,
info.morphTargetBuffer);
// note: even if only morphing is enabled, binding skinningTexture is needed.
driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING,
info.skinningTexture);
}
driver.draw(pipeline, info.primitiveHandle, instanceCount);
}
}
driver.draw(pipeline, info.primitiveHandle, instanceCount);
// If the remaining space is less than half the capacity, we flush right away to
// allow some headroom for commands that might come later.
if (UTILS_UNLIKELY(circularBuffer.getUsed() > capacity / 2)) {
engine.flush();
}
}

View File

@@ -22,26 +22,38 @@
#include "details/Camera.h"
#include "details/Scene.h"
#include "backend/DriverApiForward.h"
#include <private/filament/Variant.h>
#include "private/filament/Variant.h"
#include "utils/BitmaskEnum.h"
#include <backend/DriverEnums.h>
#include <backend/Handle.h>
#include <utils/Allocator.h>
#include <utils/Range.h>
#include <utils/Slice.h>
#include <utils/architecture.h>
#include <utils/compiler.h>
#include <utils/debug.h>
#include <math/mathfwd.h>
#include <functional>
#include <limits>
#include <optional>
#include <type_traits>
#include <tuple>
#include <vector>
#include <stddef.h>
#include <stdint.h>
namespace filament {
namespace backend {
class CommandBufferQueue;
}
class FMaterialInstance;
class RenderPassBuilder;
class RenderPass {
public:
@@ -171,7 +183,7 @@ public:
EPILOG = uint64_t(0x2) << CUSTOM_SHIFT
};
enum CommandTypeFlags : uint8_t {
enum class CommandTypeFlags : uint32_t {
COLOR = 0x1, // generate the color pass only
DEPTH = 0x2, // generate the depth pass only ( e.g. shadowmap)
@@ -191,7 +203,6 @@ public:
SCREEN_SPACE_REFLECTIONS = COLOR | FILTER_TRANSLUCENT_OBJECTS
};
/*
* The sorting material key is 32 bits and encoded as:
*
@@ -240,7 +251,6 @@ public:
uint32_t skinningOffset = 0; // 4 bytes
uint16_t instanceCount; // 2 bytes [MSb: user]
Variant materialVariant; // 1 byte
// uint8_t reserved[0] = {}; // 0 bytes
static const uint16_t USER_INSTANCE_MASK = 0x8000u;
static const uint16_t INSTANCE_COUNT_MASK = 0x7fffu;
@@ -253,7 +263,7 @@ public:
uint64_t reserved[1] = {}; // 8 bytes
bool operator < (Command const& rhs) const noexcept { return key < rhs.key; }
// placement new declared as "throw" to avoid the compiler's null-check
inline void* operator new (std::size_t, void* ptr) {
inline void* operator new (size_t, void* ptr) {
assert_invariant(ptr);
return ptr;
}
@@ -269,61 +279,31 @@ public:
// Arena used for commands
using Arena = utils::Arena<
utils::LinearAllocator, // note: can't change this allocator
utils::LinearAllocatorWithFallback,
utils::LockingPolicy::NoLock,
utils::TrackingPolicy::HighWatermark,
utils::AreaPolicy::StaticArea>;
/*
* Create a RenderPass.
* The Arena is used to allocate commands which are then owned by the Arena.
*/
RenderPass(FEngine& engine, Arena& arena) noexcept;
// RenderPass can only be moved
RenderPass(RenderPass&& rhs) = default;
// Copy the RenderPass as is. This can be used to create a RenderPass from a "template"
// by copying from an "empty" RenderPass.
RenderPass(RenderPass const& rhs);
// RenderPass can't be copied
RenderPass(RenderPass const& rhs) = delete;
RenderPass& operator=(RenderPass const& rhs) = delete;
RenderPass& operator=(RenderPass&& rhs) = delete;
// allocated commands ARE NOT freed, they're owned by the Arena
~RenderPass() noexcept;
// a box that both offsets the viewport and clips it
void setScissorViewport(backend::Viewport viewport) noexcept;
// specifies the geometry to generate commands for
void setGeometry(FScene::RenderableSoa const& soa, utils::Range<uint32_t> vr,
backend::Handle<backend::HwBufferObject> uboHandle) noexcept;
// specifies camera information (e.g. used for sorting commands)
void setCamera(const CameraInfo& camera) noexcept;
// flags controlling how commands are generated
void setRenderFlags(RenderFlags flags) noexcept { mFlags = flags; }
RenderFlags getRenderFlags() const noexcept { return mFlags; }
// variant to use
void setVariant(Variant variant) noexcept { mVariant = variant; }
// Sets the visibility mask, which is AND-ed against each Renderable's VISIBLE_MASK to determine
// if the renderable is visible for this pass.
// Defaults to all 1's, which means all renderables in this render pass will be rendered.
void setVisibilityMask(FScene::VisibleMaskType mask) noexcept { mVisibilityMask = mask; }
Command const* begin() const noexcept { return mCommandBegin; }
Command const* end() const noexcept { return mCommandEnd; }
bool empty() const noexcept { return begin() == end(); }
// This is the main function of this class, this appends commands to the pass using
// the current camera, geometry and flags set. This can be called multiple times if needed.
void appendCommands(FEngine& engine, CommandTypeFlags commandTypeFlags) noexcept;
// sorts and instanceify commands then trims sentinels
void sortCommands(FEngine& engine) noexcept;
// Helper to execute all the commands generated by this RenderPass
void execute(FEngine& engine, const char* name,
static void execute(RenderPass const& pass,
FEngine& engine, const char* name,
backend::Handle<backend::HwRenderTarget> renderTarget,
backend::RenderPassParams params) const noexcept;
backend::RenderPassParams params) noexcept;
/*
* Executor holds the range of commands to execute for a given pass
@@ -331,6 +311,7 @@ public:
class Executor {
using CustomCommandFn = std::function<void()>;
friend class RenderPass;
friend class RenderPassBuilder;
// these fields are constant after creation
utils::Slice<Command> mCommands;
@@ -346,8 +327,7 @@ public:
Executor(RenderPass const* pass, Command const* b, Command const* e) noexcept;
void execute(backend::DriverApi& driver,
const Command* first, const Command* last) const noexcept;
void execute(FEngine& engine, const Command* first, const Command* last) const noexcept;
public:
Executor() = default;
@@ -366,37 +346,39 @@ public:
};
// returns a new executor for this pass
Executor getExecutor() {
return { this, mCommandBegin, mCommandEnd };
}
Executor getExecutor() const {
return { this, mCommandBegin, mCommandEnd };
}
// returns a new executor for this pass with a custom range
Executor getExecutor(Command const* b, Command const* e) {
return { this, b, e };
}
Executor getExecutor(Command const* b, Command const* e) const {
return { this, b, e };
}
// Appends a custom command.
void appendCustomCommand(uint8_t channel, Pass pass, CustomCommand custom, uint32_t order,
Executor::CustomCommandFn command);
private:
friend class FRenderer;
friend class RenderPassBuilder;
RenderPass(FEngine& engine, RenderPassBuilder const& builder) noexcept;
Command* append(size_t count) noexcept;
void resize(size_t count) noexcept;
void instanceify(FEngine& engine) noexcept;
// This is the main function of this class, this appends commands to the pass using
// the current camera, geometry and flags set. This can be called multiple times if needed.
void appendCommands(FEngine& engine,
utils::Slice<Command> commands, CommandTypeFlags commandTypeFlags) noexcept;
// we choose the command count per job to minimize JobSystem overhead.
// on a Pixel 4, 2048 commands is about half a millisecond of processing.
// Appends a custom command.
void appendCustomCommand(Command* commands,
uint8_t channel, Pass pass, CustomCommand custom, uint32_t order,
Executor::CustomCommandFn command);
void resize(Arena& arena, size_t count) noexcept;
// sorts commands then trims sentinels
void sortCommands(Arena& arena) noexcept;
// instanceify commands then trims sentinels
void instanceify(FEngine& engine, Arena& arena) noexcept;
// We choose the command count per job to minimize JobSystem overhead.
// On a Pixel 4, 2048 commands is about half a millisecond of processing.
static constexpr size_t JOBS_PARALLEL_FOR_COMMANDS_COUNT = 2048;
static constexpr size_t JOBS_PARALLEL_FOR_COMMANDS_SIZE =
sizeof(Command) * JOBS_PARALLEL_FOR_COMMANDS_COUNT;
@@ -404,15 +386,15 @@ private:
static_assert(JOBS_PARALLEL_FOR_COMMANDS_SIZE % utils::CACHELINE_SIZE == 0,
"Size of Commands jobs must be multiple of a cache-line size");
static inline void generateCommands(uint32_t commandTypeFlags, Command* commands,
static inline void generateCommands(CommandTypeFlags commandTypeFlags, Command* commands,
FScene::RenderableSoa const& soa, utils::Range<uint32_t> range,
Variant variant, RenderFlags renderFlags,
FScene::VisibleMaskType visibilityMask,
math::float3 cameraPosition, math::float3 cameraForward,
uint8_t instancedStereoEyeCount) noexcept;
template<uint32_t commandTypeFlags>
static inline Command* generateCommandsImpl(uint32_t extraFlags, Command* curr,
template<RenderPass::CommandTypeFlags commandTypeFlags>
static inline Command* generateCommandsImpl(RenderPass::CommandTypeFlags extraFlags, Command* curr,
FScene::RenderableSoa const& soa, utils::Range<uint32_t> range,
Variant variant, RenderFlags renderFlags, FScene::VisibleMaskType visibilityMask,
math::float3 cameraPosition, math::float3 cameraForward,
@@ -424,50 +406,129 @@ private:
static void updateSummedPrimitiveCounts(
FScene::RenderableSoa& renderableData, utils::Range<uint32_t> vr) noexcept;
// a reference to the Engine, mostly to get to things like JobSystem
// Arena where all Commands are allocated. The Arena owns the commands.
Arena& mCommandArena;
FScene::RenderableSoa const& mRenderableSoa;
utils::Range<uint32_t> const mVisibleRenderables;
backend::Handle<backend::HwBufferObject> const mUboHandle;
math::float3 const mCameraPosition;
math::float3 const mCameraForwardVector;
RenderFlags const mFlags;
Variant const mVariant;
FScene::VisibleMaskType const mVisibilityMask;
backend::Viewport const mScissorViewport;
// Pointer to the first command
Command* mCommandBegin = nullptr;
// Pointer to one past the last command
Command* mCommandEnd = nullptr;
// the SOA containing the renderables we're interested in
FScene::RenderableSoa const* mRenderableSoa = nullptr;
// The range of visible renderables in the SOA above
utils::Range<uint32_t> mVisibleRenderables{};
// the UBO containing the data for the renderables
backend::Handle<backend::HwBufferObject> mUboHandle;
// a UBO for instanced primitives
backend::Handle<backend::HwBufferObject> mInstancedUboHandle;
// info about the camera
math::float3 mCameraPosition{};
math::float3 mCameraForwardVector{};
// info about the scene features (e.g.: has shadows, lighting, etc...)
RenderFlags mFlags{};
// Variant to use
Variant mVariant{};
// Additional visibility mask
FScene::VisibleMaskType mVisibilityMask = std::numeric_limits<FScene::VisibleMaskType>::max();
backend::Viewport mScissorViewport{ 0, 0,
std::numeric_limits<int32_t>::max(),
std::numeric_limits<int32_t>::max() };
// a vector for our custom commands
using CustomCommandVector = std::vector<Executor::CustomCommandFn,
utils::STLAllocator<Executor::CustomCommandFn, LinearAllocatorArena>>;
mutable CustomCommandVector mCustomCommands;
};
class RenderPassBuilder {
friend class RenderPass;
RenderPass::Arena& mArena;
RenderPass::CommandTypeFlags mCommandTypeFlags{};
backend::Viewport mScissorViewport{ 0, 0, INT32_MAX, INT32_MAX };
FScene::RenderableSoa const* mRenderableSoa = nullptr;
utils::Range<uint32_t> mVisibleRenderables{};
backend::Handle<backend::HwBufferObject> mUboHandle;
math::float3 mCameraPosition{};
math::float3 mCameraForwardVector{};
RenderPass::RenderFlags mFlags{};
Variant mVariant{};
FScene::VisibleMaskType mVisibilityMask = std::numeric_limits<FScene::VisibleMaskType>::max();
using CustomCommandRecord = std::tuple<
uint8_t,
RenderPass::Pass,
RenderPass::CustomCommand,
uint32_t,
RenderPass::Executor::CustomCommandFn>;
using CustomCommandContainer = std::vector<CustomCommandRecord,
utils::STLAllocator<CustomCommandRecord, LinearAllocatorArena>>;
// we make this optional because it's not used often, and we don't want to have
// to construct it by default.
std::optional<CustomCommandContainer> mCustomCommands;
public:
explicit RenderPassBuilder(RenderPass::Arena& arena) : mArena(arena) { }
RenderPassBuilder& commandTypeFlags(RenderPass::CommandTypeFlags commandTypeFlags) noexcept {
mCommandTypeFlags = commandTypeFlags;
return *this;
}
RenderPassBuilder& scissorViewport(backend::Viewport viewport) noexcept {
mScissorViewport = viewport;
return *this;
}
// specifies the geometry to generate commands for
RenderPassBuilder& geometry(FScene::RenderableSoa const& soa, utils::Range<uint32_t> vr,
backend::Handle<backend::HwBufferObject> uboHandle) noexcept {
mRenderableSoa = &soa;
mVisibleRenderables = vr;
mUboHandle = uboHandle;
return *this;
}
// Specifies camera information (e.g. used for sorting commands)
RenderPassBuilder& camera(const CameraInfo& camera) noexcept {
mCameraPosition = camera.getPosition();
mCameraForwardVector = camera.getForwardVector();
return *this;
}
// flags controlling how commands are generated
RenderPassBuilder& renderFlags(RenderPass::RenderFlags flags) noexcept {
mFlags = flags;
return *this;
}
// like above but allows to set specific flags
RenderPassBuilder& renderFlags(
RenderPass::RenderFlags mask, RenderPass::RenderFlags value) noexcept {
mFlags = (mFlags & mask) | (value & mask);
return *this;
}
// variant to use
RenderPassBuilder& variant(Variant variant) noexcept {
mVariant = variant;
return *this;
}
// Sets the visibility mask, which is AND-ed against each Renderable's VISIBLE_MASK to
// determine if the renderable is visible for this pass.
// Defaults to all 1's, which means all renderables in this render pass will be rendered.
RenderPassBuilder& visibilityMask(FScene::VisibleMaskType mask) noexcept {
mVisibilityMask = mask;
return *this;
}
RenderPassBuilder& customCommand(FEngine& engine,
uint8_t channel,
RenderPass::Pass pass,
RenderPass::CustomCommand custom,
uint32_t order,
const RenderPass::Executor::CustomCommandFn& command);
RenderPass build(FEngine& engine);
};
} // namespace filament
template<> struct utils::EnableBitMaskOperators<filament::RenderPass::CommandTypeFlags>
: public std::true_type {};
#endif // TNT_FILAMENT_RENDERPASS_H

View File

@@ -228,10 +228,6 @@ FrameGraphId<FrameGraphTexture> RendererUtils::colorPass(
out.params.subpassMask = 1;
}
// this is a good time to flush the CommandStream, because we're about to potentially
// output a lot of commands. This guarantees here that we have at least
// FILAMENT_MIN_COMMAND_BUFFERS_SIZE_IN_MB bytes (1MiB by default).
engine.flush();
driver.beginRenderPass(out.target, out.params);
passExecutor.execute(engine, resources.getPassName());
driver.endRenderPass();

View File

@@ -19,14 +19,21 @@
#include "RenderPass.h"
#include "ShadowMap.h"
#include "details/DebugRegistry.h"
#include "details/Texture.h"
#include "details/View.h"
#include <fg/FrameGraph.h>
#include <backend/DriverEnums.h>
#include <utils/compiler.h>
#include <utils/debug.h>
#include <utils/FixedCapacityVector.h>
#include <new>
#include <memory>
namespace filament {
using namespace backend;
@@ -128,7 +135,8 @@ void ShadowMapManager::addShadowMap(size_t lightIndex, bool spotlight,
}
FrameGraphId<FrameGraphTexture> ShadowMapManager::render(FEngine& engine, FrameGraph& fg,
RenderPass const& pass, FView& view, CameraInfo const& mainCameraInfo,
RenderPassBuilder const& passBuilder,
FView& view, CameraInfo const& mainCameraInfo,
float4 const& userTime) noexcept {
const float moment2 = std::numeric_limits<half>::max();
@@ -206,8 +214,8 @@ FrameGraphId<FrameGraphTexture> ShadowMapManager::render(FEngine& engine, FrameG
builder.sideEffect();
},
[this, &engine, &view, vsmShadowOptions,
scene, mainCameraInfo, userTime, passTemplate = pass](
FrameGraphResources const&, auto const& data, DriverApi& driver) {
scene, mainCameraInfo, userTime, passBuilder = passBuilder](
FrameGraphResources const&, auto const& data, DriverApi& driver) mutable {
// Note: we could almost parallel_for the loop below, the problem currently is
// that updatePrimitivesLod() updates temporary global state.
@@ -262,19 +270,20 @@ FrameGraphId<FrameGraphTexture> ShadowMapManager::render(FEngine& engine, FrameG
cameraInfo, scene->getRenderableData(), entry.range);
// generate and sort the commands for rendering the shadow map
RenderPass pass(passTemplate);
pass.setCamera(cameraInfo);
pass.setVisibilityMask(entry.visibilityMask);
pass.setGeometry(scene->getRenderableData(),
entry.range, scene->getRenderableUBO());
pass.appendCommands(engine, RenderPass::SHADOW);
pass.sortCommands(engine);
RenderPass const pass = passBuilder
.camera(cameraInfo)
.visibilityMask(entry.visibilityMask)
.geometry(scene->getRenderableData(),
entry.range, scene->getRenderableUBO())
.commandTypeFlags(RenderPass::CommandTypeFlags::SHADOW)
.build(engine);
entry.executor = pass.getExecutor();
if (!view.hasVSM()) {
auto const* options = shadowMap.getShadowOptions();
const PolygonOffset polygonOffset = { // handle reversed Z
PolygonOffset const polygonOffset = { // handle reversed Z
.slope = -options->polygonOffsetSlope,
.constant = -options->polygonOffsetConstant
};
@@ -395,7 +404,6 @@ FrameGraphId<FrameGraphTexture> ShadowMapManager::render(FEngine& engine, FrameG
auto rt = resources.getRenderPassInfo(data.rt);
engine.flush();
driver.beginRenderPass(rt.target, rt.params);
entry.shadowMap->bind(driver);
entry.executor.overrideScissor(entry.shadowMap->getScissor());

View File

@@ -43,6 +43,7 @@ namespace filament {
class FView;
class FrameGraph;
class RenderPass;
class RenderPassBuilder;
struct ShadowMappingUniforms {
math::float4 cascadeSplits;
@@ -86,7 +87,8 @@ public:
FScene::RenderableSoa& renderableData, FScene::LightSoa const& lightData) noexcept;
// Renders all the shadow maps.
FrameGraphId<FrameGraphTexture> render(FEngine& engine, FrameGraph& fg, RenderPass const& pass,
FrameGraphId<FrameGraphTexture> render(FEngine& engine, FrameGraph& fg,
RenderPassBuilder const& passBuilder,
FView& view, CameraInfo const& mainCameraInfo, math::float4 const& userTime) noexcept;
// valid after calling update() above

View File

@@ -198,7 +198,7 @@ FEngine::FEngine(Engine::Builder const& builder) :
mCommandBufferQueue(
builder->mConfig.minCommandBufferSizeMB * MiB,
builder->mConfig.commandBufferSizeMB * MiB),
mPerRenderPassAllocator(
mPerRenderPassArena(
"FEngine::mPerRenderPassAllocator",
builder->mConfig.perRenderPassArenaSizeMB * MiB),
mHeapAllocator("FEngine::mHeapAllocator", AreaPolicy::NullArea{}),

View File

@@ -58,17 +58,6 @@
#include <filament/Texture.h>
#include <filament/VertexBuffer.h>
#if FILAMENT_ENABLE_MATDBG
#include <matdbg/DebugServer.h>
#else
namespace filament {
namespace matdbg {
class DebugServer;
using MaterialKey = uint32_t;
} // namespace matdbg
} // namespace filament
#endif
#include <utils/compiler.h>
#include <utils/Allocator.h>
#include <utils/JobSystem.h>
@@ -78,8 +67,19 @@ using MaterialKey = uint32_t;
#include <memory>
#include <new>
#include <random>
#include <thread>
#include <type_traits>
#include <unordered_map>
#if FILAMENT_ENABLE_MATDBG
#include <matdbg/DebugServer.h>
#else
namespace filament::matdbg {
class DebugServer;
using MaterialKey = uint32_t;
} // namespace filament::matdbg
#endif
namespace filament {
class Renderer;
@@ -142,7 +142,7 @@ public:
// the per-frame Area is used by all Renderer, so they must run in sequence and
// have freed all allocated memory when done. If this needs to change in the future,
// we'll simply have to use separate Areas (for instance).
LinearAllocatorArena& getPerRenderPassAllocator() noexcept { return mPerRenderPassAllocator; }
LinearAllocatorArena& getPerRenderPassArena() noexcept { return mPerRenderPassArena; }
// Material IDs...
uint32_t getMaterialId() const noexcept { return mMaterialId++; }
@@ -508,7 +508,7 @@ private:
uint32_t mFlushCounter = 0;
LinearAllocatorArena mPerRenderPassAllocator;
RootArenaScope::Arena mPerRenderPassArena;
HeapAllocatorArena mHeapAllocator;
utils::JobSystem mJobSystem;

View File

@@ -16,6 +16,9 @@
#include "details/Renderer.h"
#include "Allocators.h"
#include "DebugRegistry.h"
#include "FrameHistory.h"
#include "PostProcessManager.h"
#include "RendererUtils.h"
#include "RenderPass.h"
@@ -28,21 +31,40 @@
#include "details/Texture.h"
#include "details/View.h"
#include <filament/Camera.h>
#include <filament/Fence.h>
#include <filament/Options.h>
#include <filament/Renderer.h>
#include <backend/DriverEnums.h>
#include <backend/DriverApiForward.h>
#include <backend/Handle.h>
#include <backend/PixelBufferDescriptor.h>
#include "fg/FrameGraph.h"
#include "fg/FrameGraphId.h"
#include "fg/FrameGraphResources.h"
#include "fg/FrameGraphTexture.h"
#include <math/vec2.h>
#include <math/vec3.h>
#include <math/mat4.h>
#include <utils/compiler.h>
#include <utils/JobSystem.h>
#include <utils/Log.h>
#include <utils/ostream.h>
#include <utils/Panic.h>
#include <utils/Systrace.h>
#include <utils/vector.h>
#include <utils/debug.h>
#include <chrono>
#include <limits>
#include <utility>
#include <stddef.h>
#include <stdint.h>
// this helps visualize what dynamic-scaling is doing
#define DEBUG_DYNAMIC_SCALING false
@@ -62,8 +84,7 @@ FRenderer::FRenderer(FEngine& engine) :
mHdrQualityMedium(TextureFormat::R11F_G11F_B10F),
mHdrQualityHigh(TextureFormat::RGB16F),
mIsRGB8Supported(false),
mUserEpoch(engine.getEngineEpoch()),
mPerRenderPassArena(engine.getPerRenderPassAllocator())
mUserEpoch(engine.getEngineEpoch())
{
FDebugRegistry& debugRegistry = engine.getDebugRegistry();
debugRegistry.registerProperty("d.renderer.doFrameCapture",
@@ -442,7 +463,7 @@ void FRenderer::render(FView const* view) {
if (UTILS_LIKELY(view && view->getScene())) {
if (mViewRenderedCount) {
// this is a good place to kick the GPU, since we've rendered a View before,
// This is a good place to kick the GPU, since we've rendered a View before,
// and we're about to render another one.
mEngine.getDriverApi().flush();
}
@@ -452,17 +473,17 @@ void FRenderer::render(FView const* view) {
}
void FRenderer::renderInternal(FView const* view) {
// per-renderpass data
ArenaScope rootArena(mPerRenderPassArena);
FEngine& engine = mEngine;
JobSystem& js = engine.getJobSystem();
// per-renderpass data
RootArenaScope rootArenaScope(engine.getPerRenderPassArena());
// create a root job so no other job can escape
JobSystem& js = engine.getJobSystem();
auto *rootJob = js.setRootJob(js.createJob());
// execute the render pass
renderJob(rootArena, const_cast<FView&>(*view));
renderJob(rootArenaScope, const_cast<FView&>(*view));
// make sure to flush the command buffer
engine.flush();
@@ -471,7 +492,7 @@ void FRenderer::renderInternal(FView const* view) {
js.runAndWait(rootJob);
}
void FRenderer::renderJob(ArenaScope& arena, FView& view) {
void FRenderer::renderJob(RootArenaScope& rootArenaScope, FView& view) {
FEngine& engine = mEngine;
JobSystem& js = engine.getJobSystem();
FEngine::DriverApi& driver = engine.getDriverApi();
@@ -636,7 +657,7 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
xvp.bottom = int32_t(guardBand);
}
view.prepare(engine, driver, arena, svp, cameraInfo, getShaderUserTime(), needsAlphaChannel);
view.prepare(engine, driver, rootArenaScope, svp, cameraInfo, getShaderUserTime(), needsAlphaChannel);
view.prepareUpscaler(scale, taaOptions, dsrOptions);
@@ -649,8 +670,10 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
// Allocate some space for our commands in the per-frame Arena, and use that space as
// an Arena for commands. All this space is released when we exit this method.
size_t const perFrameCommandsSize = engine.getPerFrameCommandsSize();
void* const arenaBegin = arena.allocate(perFrameCommandsSize, CACHELINE_SIZE);
void* const arenaBegin = rootArenaScope.allocate(perFrameCommandsSize, CACHELINE_SIZE);
void* const arenaEnd = pointermath::add(arenaBegin, perFrameCommandsSize);
// This arena *must* stay valid until all commands have been processed
RenderPass::Arena commandArena("Command Arena", { arenaBegin, arenaEnd });
RenderPass::RenderFlags renderFlags = 0;
@@ -658,8 +681,8 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
if (view.isFrontFaceWindingInverted()) renderFlags |= RenderPass::HAS_INVERSE_FRONT_FACES;
if (view.hasInstancedStereo()) renderFlags |= RenderPass::IS_STEREOSCOPIC;
RenderPass pass(engine, commandArena);
pass.setRenderFlags(renderFlags);
RenderPassBuilder passBuilder(commandArena);
passBuilder.renderFlags(renderFlags);
Variant variant;
variant.setDirectionalLighting(view.hasDirectionalLight());
@@ -682,10 +705,10 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
if (view.needsShadowMap()) {
Variant shadowVariant(Variant::DEPTH_VARIANT);
shadowVariant.setVsm(view.getShadowType() == ShadowType::VSM);
RenderPass shadowPass(pass);
shadowPass.setVariant(shadowVariant);
auto shadows = view.renderShadowMaps(engine, fg, cameraInfo, mShaderUserTime, shadowPass);
auto shadows = view.renderShadowMaps(engine, fg, cameraInfo, mShaderUserTime,
RenderPassBuilder{ commandArena }
.renderFlags(renderFlags)
.variant(shadowVariant));
blackboard["shadows"] = shadows;
}
@@ -771,8 +794,9 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
view.updatePrimitivesLod(engine, cameraInfo,
scene.getRenderableData(), view.getVisibleRenderables());
pass.setCamera(cameraInfo);
pass.setGeometry(scene.getRenderableData(), view.getVisibleRenderables(), scene.getRenderableUBO());
passBuilder.camera(cameraInfo);
passBuilder.geometry(scene.getRenderableData(),
view.getVisibleRenderables(), scene.getRenderableUBO());
// view set-ups that need to happen before rendering
fg.addTrivialSideEffectPass("Prepare View Uniforms",
@@ -818,7 +842,8 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
// This is normally used by SSAO and contact-shadows
// TODO: the scaling should depends on all passes that need the structure pass
const auto [structure, picking_] = ppm.structure(fg, pass, renderFlags, svp.width, svp.height, {
const auto [structure, picking_] = ppm.structure(fg,
passBuilder, renderFlags, svp.width, svp.height, {
.scale = aoOptions.resolution,
.picking = view.hasPicking()
});
@@ -876,7 +901,7 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
// screen-space reflections pass
if (ssReflectionsOptions.enabled) {
auto reflections = ppm.ssr(fg, pass,
auto reflections = ppm.ssr(fg, passBuilder,
view.getFrameHistory(), cameraInfo,
view.getPerViewUniforms(),
structure,
@@ -894,10 +919,15 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
// --------------------------------------------------------------------------------------------
// Color passes
// this makes the viewport relative to xvp
// FIXME: we should use 'vp' when rendering directly into the swapchain, but that's hard to
// know at this point. This will usually be the case when post-process is disabled.
// FIXME: we probably should take the dynamic scaling into account too
passBuilder.scissorViewport(hasPostProcess ? xvp : vp);
// This one doesn't need to be a FrameGraph pass because it always happens by construction
// (i.e. it won't be culled, unless everything is culled), so no need to complexify things.
pass.setVariant(variant);
pass.appendCommands(engine, RenderPass::COLOR);
passBuilder.variant(variant);
// color-grading as subpass is done either by the color pass or the TAA pass if any
auto colorGradingConfigForColor = colorGradingConfig;
@@ -905,7 +935,7 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
if (colorGradingConfigForColor.asSubpass) {
// append color grading subpass after all other passes
pass.appendCustomCommand(3,
passBuilder.customCommand(engine, 3,
RenderPass::Pass::BLENDED,
RenderPass::CustomCommand::EPILOG,
0, [&ppm, &driver, colorGradingConfigForColor]() {
@@ -913,7 +943,7 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
});
} else if (colorGradingConfig.customResolve) {
// append custom resolve subpass after all other passes
pass.appendCustomCommand(3,
passBuilder.customCommand(engine, 3,
RenderPass::Pass::BLENDED,
RenderPass::CustomCommand::EPILOG,
0, [&ppm, &driver]() {
@@ -921,16 +951,9 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
});
}
// sort commands once we're done adding commands
pass.sortCommands(engine);
// this makes the viewport relative to xvp
// FIXME: we should use 'vp' when rendering directly into the swapchain, but that's hard to
// know at this point. This will usually be the case when post-process is disabled.
// FIXME: we probably should take the dynamic scaling into account too
pass.setScissorViewport(hasPostProcess ? xvp : vp);
passBuilder.commandTypeFlags(RenderPass::CommandTypeFlags::COLOR);
RenderPass const pass{ passBuilder.build(engine) };
FrameGraphTexture::Descriptor const desc = {
.width = config.physicalViewport.width,

View File

@@ -163,7 +163,7 @@ private:
}
void renderInternal(FView const* view);
void renderJob(ArenaScope& arena, FView& view);
void renderJob(RootArenaScope& rootArenaScope, FView& view);
// keep a reference to our engine
FEngine& mEngine;
@@ -187,9 +187,6 @@ private:
backend::TargetBufferFlags mClearFlags{};
tsl::robin_set<FRenderTarget*> mPreviousRenderTargets;
std::function<void()> mBeginFrameInternal;
// per-frame arena for this Renderer
LinearAllocatorArena& mPerRenderPassArena;
};
FILAMENT_DOWNCAST(Renderer)

View File

@@ -53,7 +53,7 @@ FScene::~FScene() noexcept = default;
void FScene::prepare(utils::JobSystem& js,
LinearAllocatorArena& allocator,
RootArenaScope& rootArenaScope,
mat4 const& worldTransform,
bool shadowReceiversAreCasters) noexcept {
// TODO: can we skip this in most cases? Since we rely on indices staying the same,
@@ -64,7 +64,7 @@ void FScene::prepare(utils::JobSystem& js,
SYSTRACE_CONTEXT();
// This will reset the allocator upon exiting
ArenaScope const arena(allocator);
ArenaScope<RootArenaScope::Arena> localArenaScope(rootArenaScope.getArena());
FEngine& engine = mEngine;
EntityManager const& em = engine.getEntityManager();
@@ -85,10 +85,10 @@ void FScene::prepare(utils::JobSystem& js,
utils::STLAllocator< LightContainerData, LinearAllocatorArena >, false>;
RenderableInstanceContainer renderableInstances{
RenderableInstanceContainer::with_capacity(entities.size(), allocator) };
RenderableInstanceContainer::with_capacity(entities.size(), localArenaScope.getArena()) };
LightInstanceContainer lightInstances{
LightInstanceContainer::with_capacity(entities.size(), allocator) };
LightInstanceContainer::with_capacity(entities.size(), localArenaScope.getArena()) };
SYSTRACE_NAME_BEGIN("InstanceLoop");
@@ -454,7 +454,7 @@ void FScene::terminate(FEngine&) {
mRenderableViewUbh.clear();
}
void FScene::prepareDynamicLights(const CameraInfo& camera, ArenaScope&,
void FScene::prepareDynamicLights(const CameraInfo& camera,
Handle<HwBufferObject> lightUbh) noexcept {
FEngine::DriverApi& driver = mEngine.getDriverApi();
FLightManager const& lcm = mEngine.getLightManager();

View File

@@ -31,6 +31,8 @@
#include <filament/Box.h>
#include <filament/Scene.h>
#include <math/mathfwd.h>
#include <utils/compiler.h>
#include <utils/Entity.h>
#include <utils/Slice.h>
@@ -70,12 +72,12 @@ public:
~FScene() noexcept;
void terminate(FEngine& engine);
void prepare(utils::JobSystem& js, LinearAllocatorArena& allocator,
void prepare(utils::JobSystem& js, RootArenaScope& rootArenaScope,
math::mat4 const& worldTransform, bool shadowReceiversAreCasters) noexcept;
void prepareVisibleRenderables(utils::Range<uint32_t> visibleRenderables) noexcept;
void prepareDynamicLights(const CameraInfo& camera, ArenaScope& arena,
void prepareDynamicLights(const CameraInfo& camera,
backend::Handle<backend::HwBufferObject> lightUbh) noexcept;
backend::Handle<backend::HwBufferObject> getRenderableUBO() const noexcept {

View File

@@ -341,8 +341,7 @@ void FView::prepareShadowing(FEngine& engine, FScene::RenderableSoa& renderableD
mNeedsShadowMap = any(shadowTechnique & ShadowMapManager::ShadowTechnique::SHADOW_MAP);
}
void FView::prepareLighting(FEngine& engine, ArenaScope& arena,
CameraInfo const& cameraInfo) noexcept {
void FView::prepareLighting(FEngine& engine, CameraInfo const& cameraInfo) noexcept {
SYSTRACE_CALL();
SYSTRACE_CONTEXT();
@@ -354,7 +353,7 @@ void FView::prepareLighting(FEngine& engine, ArenaScope& arena,
*/
if (hasDynamicLighting()) {
scene->prepareDynamicLights(cameraInfo, arena, mLightUbh);
scene->prepareDynamicLights(cameraInfo, mLightUbh);
}
// here the array of visible lights has been shrunk to CONFIG_MAX_LIGHT_COUNT
@@ -427,7 +426,7 @@ CameraInfo FView::computeCameraInfo(FEngine& engine) const noexcept {
return { *camera, mat4{ rotation } * mat4::translation(translation) };
}
void FView::prepare(FEngine& engine, DriverApi& driver, ArenaScope& arena,
void FView::prepare(FEngine& engine, DriverApi& driver, RootArenaScope& rootArenaScope,
filament::Viewport viewport, CameraInfo cameraInfo,
float4 const& userTime, bool needsAlphaChannel) noexcept {
@@ -465,7 +464,7 @@ void FView::prepare(FEngine& engine, DriverApi& driver, ArenaScope& arena,
* Gather all information needed to render this scene. Apply the world origin to all
* objects in the scene.
*/
scene->prepare(js, arena.getAllocator(),
scene->prepare(js, rootArenaScope,
cameraInfo.worldTransform,
hasVSM());
@@ -475,14 +474,22 @@ void FView::prepare(FEngine& engine, DriverApi& driver, ArenaScope& arena,
JobSystem::Job* froxelizeLightsJob = nullptr;
JobSystem::Job* prepareVisibleLightsJob = nullptr;
if (scene->getLightData().size() > FScene::DIRECTIONAL_LIGHTS_COUNT) {
size_t const lightCount = scene->getLightData().size();
if (lightCount > FScene::DIRECTIONAL_LIGHTS_COUNT) {
// create and start the prepareVisibleLights job
// note: this job updates LightData (non const)
// allocate a scratch buffer for distances outside the job below, so we don't need
// to use a locked allocator; the downside is that we need to account for the worst case.
size_t const positionalLightCount = lightCount - FScene::DIRECTIONAL_LIGHTS_COUNT;
float* const distances = rootArenaScope.allocate<float>(
(positionalLightCount + 3u) & ~3u, CACHELINE_SIZE);
prepareVisibleLightsJob = js.runAndRetain(js.createJob(nullptr,
[&engine, &arena, &viewMatrix = cameraInfo.view, &cullingFrustum,
[&engine, distances, positionalLightCount, &viewMatrix = cameraInfo.view, &cullingFrustum,
&lightData = scene->getLightData()]
(JobSystem&, JobSystem::Job*) {
FView::prepareVisibleLights(engine.getLightManager(), arena,
FView::prepareVisibleLights(engine.getLightManager(),
{ distances, distances + positionalLightCount },
viewMatrix, cullingFrustum, lightData);
}));
}
@@ -530,7 +537,7 @@ void FView::prepare(FEngine& engine, DriverApi& driver, ArenaScope& arena,
// As soon as prepareVisibleLight finishes, we can kick-off the froxelization
if (hasDynamicLighting()) {
auto& froxelizer = mFroxelizer;
if (froxelizer.prepare(driver, arena, viewport,
if (froxelizer.prepare(driver, rootArenaScope, viewport,
cameraInfo.projection, cameraInfo.zn, cameraInfo.zf)) {
// TODO: might be more consistent to do this in prepareLighting(), but it's not
// strictly necessary
@@ -645,7 +652,7 @@ void FView::prepare(FEngine& engine, DriverApi& driver, ArenaScope& arena,
* Relies on FScene::prepare() and prepareVisibleLights()
*/
prepareLighting(engine, arena, cameraInfo);
prepareLighting(engine, cameraInfo);
/*
* Update driver state
@@ -850,7 +857,8 @@ void FView::cullRenderables(JobSystem&,
functor(0, renderableData.size());
}
void FView::prepareVisibleLights(FLightManager const& lcm, ArenaScope& rootArena,
void FView::prepareVisibleLights(FLightManager const& lcm,
utils::Slice<float> scratch,
mat4f const& viewMatrix, Frustum const& frustum,
FScene::LightSoa& lightData) noexcept {
SYSTRACE_CALL();
@@ -918,28 +926,25 @@ void FView::prepareVisibleLights(FLightManager const& lcm, ArenaScope& rootArena
* - This helps our limited numbers of spot-shadow as well.
*/
ArenaScope arena(rootArena.getAllocator());
size_t const size = visibleLightCount;
// number of point/spotlights
size_t const positionalLightCount = size - FScene::DIRECTIONAL_LIGHTS_COUNT;
size_t const positionalLightCount = visibleLightCount - FScene::DIRECTIONAL_LIGHTS_COUNT;
if (positionalLightCount) {
// always allocate at least 4 entries, because the vectorized loops below rely on that
float* const UTILS_RESTRICT distances =
arena.allocate<float>((size + 3u) & ~3u, CACHELINE_SIZE);
assert_invariant(positionalLightCount <= scratch.size());
// pre-compute the lights' distance to the camera, for sorting below
// - we don't skip the directional light, because we don't care, it's ignored during sorting
float* const UTILS_RESTRICT distances = scratch.data();
float4 const* const UTILS_RESTRICT spheres = lightData.data<FScene::POSITION_RADIUS>();
computeLightCameraDistances(distances, viewMatrix, spheres, size);
computeLightCameraDistances(distances, viewMatrix, spheres, visibleLightCount);
// skip directional light
Zip2Iterator<FScene::LightSoa::iterator, float*> b = { lightData.begin(), distances };
std::sort(b + FScene::DIRECTIONAL_LIGHTS_COUNT, b + size,
std::sort(b + FScene::DIRECTIONAL_LIGHTS_COUNT, b + visibleLightCount,
[](auto const& lhs, auto const& rhs) { return lhs.second < rhs.second; });
}
// drop excess lights
lightData.resize(std::min(size, CONFIG_MAX_LIGHT_COUNT + FScene::DIRECTIONAL_LIGHTS_COUNT));
lightData.resize(std::min(visibleLightCount,
CONFIG_MAX_LIGHT_COUNT + FScene::DIRECTIONAL_LIGHTS_COUNT));
}
// These methods need to exist so clang honors the __restrict__ keyword, which in turn
@@ -972,8 +977,9 @@ void FView::updatePrimitivesLod(FEngine& engine, const CameraInfo&,
}
FrameGraphId<FrameGraphTexture> FView::renderShadowMaps(FEngine& engine, FrameGraph& fg,
CameraInfo const& cameraInfo, float4 const& userTime, RenderPass const& pass) noexcept {
return mShadowMapManager.render(engine, fg, pass, *this, cameraInfo, userTime);
CameraInfo const& cameraInfo, float4 const& userTime,
RenderPassBuilder const& passBuilder) noexcept {
return mShadowMapManager.render(engine, fg, passBuilder, *this, cameraInfo, userTime);
}
void FView::commitFrameHistory(FEngine& engine) noexcept {

View File

@@ -88,7 +88,7 @@ public:
// note: viewport/cameraInfo are passed by value to make it clear that prepare cannot
// keep references on them that would outlive the scope of prepare() (e.g. with JobSystem).
void prepare(FEngine& engine, backend::DriverApi& driver, ArenaScope& arena,
void prepare(FEngine& engine, backend::DriverApi& driver, RootArenaScope& rootArenaScope,
filament::Viewport viewport, CameraInfo cameraInfo,
math::float4 const& userTime, bool needsAlphaChannel) noexcept;
@@ -144,7 +144,7 @@ public:
void prepareShadowing(FEngine& engine, FScene::RenderableSoa& renderableData,
FScene::LightSoa const& lightData, CameraInfo const& cameraInfo) noexcept;
void prepareLighting(FEngine& engine, ArenaScope& arena, CameraInfo const& cameraInfo) noexcept;
void prepareLighting(FEngine& engine, CameraInfo const& cameraInfo) noexcept;
void prepareSSAO(backend::Handle<backend::HwTexture> ssao) const noexcept;
void prepareSSR(backend::Handle<backend::HwTexture> ssr, bool disableSSR,
@@ -176,7 +176,7 @@ public:
FrameGraphId<FrameGraphTexture> renderShadowMaps(FEngine& engine, FrameGraph& fg,
CameraInfo const& cameraInfo, math::float4 const& userTime,
RenderPass const& pass) noexcept;
RenderPassBuilder const& passBuilder) noexcept;
void updatePrimitivesLod(
FEngine& engine, const CameraInfo& camera,
@@ -460,7 +460,8 @@ private:
void prepareVisibleRenderables(utils::JobSystem& js,
Frustum const& frustum, FScene::RenderableSoa& renderableData) const noexcept;
static void prepareVisibleLights(FLightManager const& lcm, ArenaScope& rootArena,
static void prepareVisibleLights(FLightManager const& lcm,
utils::Slice<float> scratch,
math::mat4f const& viewMatrix, Frustum const& frustum,
FScene::LightSoa& lightData) noexcept;

View File

@@ -30,6 +30,7 @@
#include <assert.h>
#include <stdlib.h>
#include <stdint.h>
#include <vector>
namespace utils {
@@ -43,14 +44,14 @@ static inline P* add(P* a, T b) noexcept {
template <typename P>
static inline P* align(P* p, size_t alignment) noexcept {
// alignment must be a power-of-two
assert(alignment && !(alignment & alignment-1));
assert_invariant(alignment && !(alignment & alignment-1));
return (P*)((uintptr_t(p) + alignment - 1) & ~(alignment - 1));
}
template <typename P>
static inline P* align(P* p, size_t alignment, size_t offset) noexcept {
P* const r = align(add(p, offset), alignment);
assert(r >= add(p, offset));
assert_invariant(r >= add(p, offset));
return r;
}
@@ -89,20 +90,19 @@ public:
// branch-less allocation
void* const p = pointermath::align(current(), alignment, extra);
void* const c = pointermath::add(p, size);
bool success = c <= end();
bool const success = c <= end();
set_current(success ? c : current());
return success ? p : nullptr;
}
// API specific to this allocator
void *getCurrent() UTILS_RESTRICT noexcept {
return current();
}
// free memory back to the specified point
void rewind(void* p) UTILS_RESTRICT noexcept {
assert(p>=mBegin && p<end());
assert_invariant(p >= mBegin && p < end());
set_current(p);
}
@@ -122,16 +122,21 @@ public:
void swap(LinearAllocator& rhs) noexcept;
void *base() noexcept { return mBegin; }
void const *base() const noexcept { return mBegin; }
void free(void*, size_t) UTILS_RESTRICT noexcept { }
private:
protected:
void* end() UTILS_RESTRICT noexcept { return pointermath::add(mBegin, mSize); }
void const* end() const UTILS_RESTRICT noexcept { return pointermath::add(mBegin, mSize); }
void* current() UTILS_RESTRICT noexcept { return pointermath::add(mBegin, mCur); }
void const* current() const UTILS_RESTRICT noexcept { return pointermath::add(mBegin, mCur); }
private:
void set_current(void* p) UTILS_RESTRICT noexcept {
mCur = uint32_t(uintptr_t(p) - uintptr_t(mBegin));
}
void* mBegin = nullptr;
uint32_t mSize = 0;
uint32_t mCur = 0;
@@ -152,9 +157,7 @@ public:
explicit HeapAllocator(const AREA&) { }
// our allocator concept
void* alloc(size_t size, size_t alignment = alignof(std::max_align_t), size_t extra = 0) {
// this allocator doesn't support 'extra'
assert(extra == 0);
void* alloc(size_t size, size_t alignment = alignof(std::max_align_t)) {
return aligned_alloc(size, alignment);
}
@@ -171,6 +174,50 @@ public:
void swap(HeapAllocator&) noexcept { }
};
/* ------------------------------------------------------------------------------------------------
* LinearAllocatorWithFallback
*
* This is a LinearAllocator that falls back to a HeapAllocator when allocation fail. The Heap
* allocator memory is freed only when the LinearAllocator is reset or destroyed.
* ------------------------------------------------------------------------------------------------
*/
class LinearAllocatorWithFallback : private LinearAllocator, private HeapAllocator {
std::vector<void*> mHeapAllocations;
public:
LinearAllocatorWithFallback(void* begin, void* end) noexcept
: LinearAllocator(begin, end) {
}
template <typename AREA>
explicit LinearAllocatorWithFallback(const AREA& area)
: LinearAllocatorWithFallback(area.begin(), area.end()) {
}
~LinearAllocatorWithFallback() noexcept {
LinearAllocatorWithFallback::reset();
}
void* alloc(size_t size, size_t alignment = alignof(std::max_align_t));
void *getCurrent() noexcept {
return LinearAllocator::getCurrent();
}
void rewind(void* p) noexcept {
if (p >= LinearAllocator::base() && p < LinearAllocator::end()) {
LinearAllocator::rewind(p);
}
}
void reset() noexcept;
void free(void*, size_t) noexcept { }
bool isHeapAllocation(void* p) const noexcept {
return p < LinearAllocator::base() || p >= LinearAllocator::end();
}
};
// ------------------------------------------------------------------------------------------------
class FreeList {
@@ -186,13 +233,13 @@ public:
Node* const head = mHead;
mHead = head ? head->next : nullptr;
// this could indicate a use after free
assert(!mHead || mHead >= mBegin && mHead < mEnd);
assert_invariant(!mHead || mHead >= mBegin && mHead < mEnd);
return head;
}
void push(void* p) noexcept {
assert(p);
assert(p >= mBegin && p < mEnd);
assert_invariant(p);
assert_invariant(p >= mBegin && p < mEnd);
// TODO: assert this is one of our pointer (i.e.: it's address match one of ours)
Node* const head = static_cast<Node*>(p);
head->next = mHead;
@@ -229,7 +276,7 @@ public:
AtomicFreeList& operator=(const FreeList& rhs) = delete;
void* pop() noexcept {
Node* const storage = mStorage;
Node* const pStorage = mStorage;
HeadPtr currentHead = mHead.load();
while (currentHead.offset >= 0) {
@@ -237,8 +284,8 @@ public:
// thread raced ahead of us. But in that case, the computed "newHead" will be discarded
// since compare_exchange_weak fails. Then this thread will loop with the updated
// value of currentHead, and try again.
Node* const next = storage[currentHead.offset].next.load(std::memory_order_relaxed);
const HeadPtr newHead{ next ? int32_t(next - storage) : -1, currentHead.tag + 1 };
Node* const pNext = pStorage[currentHead.offset].next.load(std::memory_order_relaxed);
const HeadPtr newHead{ pNext ? int32_t(pNext - pStorage) : -1, currentHead.tag + 1 };
// In the rare case that the other thread that raced ahead of us already returned the
// same mHead we just loaded, but it now has a different "next" value, the tag field will not
// match, and compare_exchange_weak will fail and prevent that particular race condition.
@@ -246,18 +293,18 @@ public:
// This assert needs to occur after we have validated that there was no race condition
// Otherwise, next might already contain application data, if another thread
// raced ahead of us after we loaded mHead, but before we loaded mHead->next.
assert(!next || next >= storage);
assert_invariant(!pNext || pNext >= pStorage);
break;
}
}
void* p = (currentHead.offset >= 0) ? (storage + currentHead.offset) : nullptr;
assert(!p || p >= storage);
void* p = (currentHead.offset >= 0) ? (pStorage + currentHead.offset) : nullptr;
assert_invariant(!p || p >= pStorage);
return p;
}
void push(void* p) noexcept {
Node* const storage = mStorage;
assert(p && p >= storage);
assert_invariant(p && p >= storage);
Node* const node = static_cast<Node*>(p);
HeadPtr currentHead = mHead.load();
HeadPtr newHead = { int32_t(node - storage), currentHead.tag + 1 };
@@ -330,9 +377,9 @@ public:
// our allocator concept
void* alloc(size_t size = ELEMENT_SIZE,
size_t alignment = ALIGNMENT, size_t offset = OFFSET) noexcept {
assert(size <= ELEMENT_SIZE);
assert(alignment <= ALIGNMENT);
assert(offset == OFFSET);
assert_invariant(size <= ELEMENT_SIZE);
assert_invariant(alignment <= ALIGNMENT);
assert_invariant(offset == OFFSET);
return mFreeList.pop();
}
@@ -587,23 +634,36 @@ public:
// allocate memory from arena with given size and alignment
// (acceptable size/alignment may depend on the allocator provided)
void* alloc(size_t size, size_t alignment = alignof(std::max_align_t), size_t extra = 0) noexcept {
void* alloc(size_t size, size_t alignment, size_t extra) noexcept {
std::lock_guard<LockingPolicy> guard(mLock);
void* p = mAllocator.alloc(size, alignment, extra);
mListener.onAlloc(p, size, alignment, extra);
return p;
}
void* alloc(size_t size, size_t alignment = alignof(std::max_align_t)) noexcept {
std::lock_guard<LockingPolicy> guard(mLock);
void* p = mAllocator.alloc(size, alignment);
mListener.onAlloc(p, size, alignment, 0);
return p;
}
// Allocate an array of trivially destructible objects
// for safety, we disable the object-based alloc method if the object type is not
// trivially destructible, since free() won't call the destructor and this is allocating
// an array.
template <typename T,
typename = typename std::enable_if<std::is_trivially_destructible<T>::value>::type>
T* alloc(size_t count, size_t alignment = alignof(T), size_t extra = 0) noexcept {
T* alloc(size_t count, size_t alignment, size_t extra) noexcept {
return (T*)alloc(count * sizeof(T), alignment, extra);
}
template <typename T,
typename = typename std::enable_if<std::is_trivially_destructible<T>::value>::type>
T* alloc(size_t count, size_t alignment = alignof(T)) noexcept {
return (T*)alloc(count * sizeof(T), alignment);
}
// return memory pointed by p to the arena
// (actual behaviour may depend on allocator provided)
void free(void* p) noexcept {
@@ -720,6 +780,8 @@ class ArenaScope {
}
public:
using Arena = ARENA;
explicit ArenaScope(ARENA& allocator)
: mArena(allocator), mRewind(allocator.getCurrent()) {
}
@@ -771,7 +833,7 @@ public:
}
// use with caution
ARENA& getAllocator() noexcept { return mArena; }
ARENA& getArena() noexcept { return mArena; }
private:
ARENA& mArena;

View File

@@ -16,6 +16,8 @@
#include <utils/Allocator.h>
#include <utils/compiler.h>
#include <utils/debug.h>
#include <utils/Log.h>
#include <algorithm>
@@ -52,6 +54,29 @@ void LinearAllocator::swap(LinearAllocator& rhs) noexcept {
std::swap(mCur, rhs.mCur);
}
// ------------------------------------------------------------------------------------------------
// LinearAllocatorWithFallback
// ------------------------------------------------------------------------------------------------
void* LinearAllocatorWithFallback::alloc(size_t size, size_t alignment) {
void* p = LinearAllocator::alloc(size, alignment);
if (UTILS_UNLIKELY(!p)) {
p = HeapAllocator::alloc(size, alignment);
mHeapAllocations.push_back(p);
}
assert_invariant(p);
return p;
}
void LinearAllocatorWithFallback::reset() noexcept {
LinearAllocator::reset();
for (auto* p : mHeapAllocations) {
HeapAllocator::free(p);
}
mHeapAllocations.clear();
}
// ------------------------------------------------------------------------------------------------
// FreeList
// ------------------------------------------------------------------------------------------------
@@ -61,8 +86,8 @@ FreeList::Node* FreeList::init(void* begin, void* end,
{
void* const p = pointermath::align(begin, alignment, extra);
void* const n = pointermath::align(pointermath::add(p, elementSize), alignment, extra);
assert(p >= begin && p < end);
assert(n >= begin && n < end && n > p);
assert_invariant(p >= begin && p < end);
assert_invariant(n >= begin && n < end && n > p);
const size_t d = uintptr_t(n) - uintptr_t(p);
const size_t num = (uintptr_t(end) - uintptr_t(p)) / d;
@@ -77,8 +102,8 @@ FreeList::Node* FreeList::init(void* begin, void* end,
cur->next = next;
cur = next;
}
assert(cur < end);
assert(pointermath::add(cur, d) <= end);
assert_invariant(cur < end);
assert_invariant(pointermath::add(cur, d) <= end);
cur->next = nullptr;
return head;
}
@@ -97,13 +122,13 @@ AtomicFreeList::AtomicFreeList(void* begin, void* end,
{
#ifdef __ANDROID__
// on some platform (e.g. web) this returns false. we really only care about mobile though.
assert(mHead.is_lock_free());
assert_invariant(mHead.is_lock_free());
#endif
void* const p = pointermath::align(begin, alignment, extra);
void* const n = pointermath::align(pointermath::add(p, elementSize), alignment, extra);
assert(p >= begin && p < end);
assert(n >= begin && n < end && n > p);
assert_invariant(p >= begin && p < end);
assert_invariant(n >= begin && n < end && n > p);
const size_t d = uintptr_t(n) - uintptr_t(p);
const size_t num = (uintptr_t(end) - uintptr_t(p)) / d;
@@ -119,8 +144,8 @@ AtomicFreeList::AtomicFreeList(void* begin, void* end,
cur->next = next;
cur = next;
}
assert(cur < end);
assert(pointermath::add(cur, d) <= end);
assert_invariant(cur < end);
assert_invariant(pointermath::add(cur, d) <= end);
cur->next = nullptr;
mHead.store({ int32_t(head - mStorage), 0 });
@@ -148,22 +173,25 @@ TrackingPolicy::HighWatermark::~HighWatermark() noexcept {
}
void TrackingPolicy::HighWatermark::onFree(void* p, size_t size) noexcept {
assert(mCurrent >= size);
// FIXME: this code is incorrect with LinearAllocators because free() is a no-op for them
assert_invariant(mCurrent >= size);
mCurrent -= uint32_t(size);
}
void TrackingPolicy::HighWatermark::onReset() noexcept {
// we should never be here if mBase is nullptr because compilation would have failed when
// Arena::onReset() tries to call the underlying allocator's onReset()
assert(mBase);
assert_invariant(mBase);
mCurrent = 0;
}
void TrackingPolicy::HighWatermark::onRewind(void const* addr) noexcept {
// we should never be here if mBase is nullptr because compilation would have failed when
// Arena::onRewind() tries to call the underlying allocator's onReset()
assert(mBase);
assert(addr >= mBase);
mCurrent = uint32_t(uintptr_t(addr) - uintptr_t(mBase));
assert_invariant(mBase);
// for LinearAllocatorWithFallback we could get pointers outside the range
if (addr >= mBase && addr < pointermath::add(mBase, mSize)) {
mCurrent = uint32_t(uintptr_t(addr) - uintptr_t(mBase));
}
}
// ------------------------------------------------------------------------------------------------
@@ -183,7 +211,7 @@ void TrackingPolicy::Debug::onFree(void* p, size_t size) noexcept {
void TrackingPolicy::Debug::onReset() noexcept {
// we should never be here if mBase is nullptr because compilation would have failed when
// Arena::onReset() tries to call the underlying allocator's onReset()
assert(mBase);
assert_invariant(mBase);
memset(mBase, 0xec, mSize);
}