Compare commits
5 Commits
pf/test-si
...
bjd/improv
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0404be5127 | ||
|
|
a784fb7d8b | ||
|
|
8459cbc91c | ||
|
|
344c04a2d8 | ||
|
|
55e2691f3f |
@@ -17,7 +17,10 @@
|
||||
#ifndef TNT_FILAMENT_BACKEND_PRIVATE_CIRCULARBUFFER_H
|
||||
#define TNT_FILAMENT_BACKEND_PRIVATE_CIRCULARBUFFER_H
|
||||
|
||||
#include <utils/debug.h>
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace filament::backend {
|
||||
|
||||
@@ -37,28 +40,36 @@ public:
|
||||
|
||||
~CircularBuffer() noexcept;
|
||||
|
||||
// allocates 'size' bytes in the circular buffer and returns a pointer to the memory
|
||||
// return the current head and moves it forward by size bytes
|
||||
inline void* allocate(size_t size) noexcept {
|
||||
static size_t getBlockSize() noexcept { return sPageSize; }
|
||||
|
||||
// Total size of circular buffer. This is a constant.
|
||||
size_t size() const noexcept { return mSize; }
|
||||
|
||||
// Allocates `s` bytes in the circular buffer and returns a pointer to the memory. All
|
||||
// allocations must not exceed size() bytes.
|
||||
inline void* allocate(size_t s) noexcept {
|
||||
// We can never allocate more that size().
|
||||
assert_invariant(getUsed() + s <= size());
|
||||
char* const cur = static_cast<char*>(mHead);
|
||||
mHead = cur + size;
|
||||
mHead = cur + s;
|
||||
return cur;
|
||||
}
|
||||
|
||||
// Total size of circular buffer
|
||||
size_t size() const noexcept { return mSize; }
|
||||
|
||||
// returns true if the buffer is empty (e.g. after calling flush)
|
||||
// Returns true if the buffer is empty, i.e.: no allocations were made since
|
||||
// calling getBuffer();
|
||||
bool empty() const noexcept { return mTail == mHead; }
|
||||
|
||||
void* getHead() const noexcept { return mHead; }
|
||||
// Returns the size used since the last call to getBuffer()
|
||||
size_t getUsed() const noexcept { return intptr_t(mHead) - intptr_t(mTail); }
|
||||
|
||||
void* getTail() const noexcept { return mTail; }
|
||||
|
||||
// call at least once every getRequiredSize() bytes allocated from the buffer
|
||||
void circularize() noexcept;
|
||||
|
||||
static size_t getBlockSize() noexcept { return sPageSize; }
|
||||
// Retrieves the current allocated range and frees it. It is the responsibility of the caller
|
||||
// to make sure the returned range is no longer in use by the time allocate() allocates
|
||||
// (size() - getUsed()) bytes.
|
||||
struct Range {
|
||||
void* tail;
|
||||
void* head;
|
||||
};
|
||||
Range getBuffer() noexcept;
|
||||
|
||||
private:
|
||||
void* alloc(size_t size) noexcept;
|
||||
@@ -66,10 +77,10 @@ private:
|
||||
|
||||
// pointer to the beginning of the circular buffer (constant)
|
||||
void* mData = nullptr;
|
||||
int mUsesAshmem = -1;
|
||||
int mAshmemFd = -1;
|
||||
|
||||
// size of the circular buffer (constant)
|
||||
size_t mSize = 0;
|
||||
size_t const mSize;
|
||||
|
||||
// pointer to the beginning of recorded data
|
||||
void* mTail = nullptr;
|
||||
|
||||
@@ -33,7 +33,7 @@ namespace filament::backend {
|
||||
* A producer-consumer command queue that uses a CircularBuffer as main storage
|
||||
*/
|
||||
class CommandBufferQueue {
|
||||
struct Slice {
|
||||
struct Range {
|
||||
void* begin;
|
||||
void* end;
|
||||
};
|
||||
@@ -46,7 +46,7 @@ class CommandBufferQueue {
|
||||
|
||||
mutable utils::Mutex mLock;
|
||||
mutable utils::Condition mCondition;
|
||||
mutable std::vector<Slice> mCommandBuffersToExecute;
|
||||
mutable std::vector<Range> mCommandBuffersToExecute;
|
||||
size_t mFreeSpace = 0;
|
||||
size_t mHighWatermark = 0;
|
||||
uint32_t mExitRequested = 0;
|
||||
@@ -58,17 +58,20 @@ public:
|
||||
CommandBufferQueue(size_t requiredSize, size_t bufferSize);
|
||||
~CommandBufferQueue();
|
||||
|
||||
CircularBuffer& getCircularBuffer() { return mCircularBuffer; }
|
||||
CircularBuffer& getCircularBuffer() noexcept { return mCircularBuffer; }
|
||||
CircularBuffer const& getCircularBuffer() const noexcept { return mCircularBuffer; }
|
||||
|
||||
size_t getCapacity() const noexcept { return mRequiredSize; }
|
||||
|
||||
size_t getHighWatermark() const noexcept { return mHighWatermark; }
|
||||
|
||||
// wait for commands to be available and returns an array containing these commands
|
||||
std::vector<Slice> waitForCommands() const;
|
||||
std::vector<Range> waitForCommands() const;
|
||||
|
||||
// return the memory used by this command buffer to the circular buffer
|
||||
// WARNING: releaseBuffer() must be called in sequence of the Slices returned by
|
||||
// waitForCommands()
|
||||
void releaseBuffer(Slice const& buffer);
|
||||
void releaseBuffer(Range const& buffer);
|
||||
|
||||
// all commands buffers (Slices) written to this point are returned by waitForCommand(). This
|
||||
// call blocks until the CircularBuffer has at least mRequiredSize bytes available.
|
||||
|
||||
@@ -213,6 +213,8 @@ public:
|
||||
CommandStream(CommandStream const& rhs) noexcept = delete;
|
||||
CommandStream& operator=(CommandStream const& rhs) noexcept = delete;
|
||||
|
||||
CircularBuffer const& getCircularBuffer() const noexcept { return mCurrentBuffer; }
|
||||
|
||||
public:
|
||||
#define DECL_DRIVER_API(methodName, paramsDecl, params) \
|
||||
inline void methodName(paramsDecl) { \
|
||||
|
||||
@@ -231,7 +231,7 @@ private:
|
||||
explicit Allocator(const utils::AreaPolicy::HeapArea& area);
|
||||
|
||||
// this is in fact always called with a constexpr size argument
|
||||
[[nodiscard]] inline void* alloc(size_t size, size_t, size_t extra) noexcept {
|
||||
[[nodiscard]] inline void* alloc(size_t size, size_t, size_t extra = 0) noexcept {
|
||||
void* p = nullptr;
|
||||
if (size <= mPool0.getSize()) p = mPool0.alloc(size, 16, extra);
|
||||
else if (size <= mPool1.getSize()) p = mPool1.alloc(size, 16, extra);
|
||||
|
||||
@@ -16,6 +16,14 @@
|
||||
|
||||
#include "private/backend/CircularBuffer.h"
|
||||
|
||||
#include <utils/Log.h>
|
||||
#include <utils/Panic.h>
|
||||
#include <utils/architecture.h>
|
||||
#include <utils/ashmem.h>
|
||||
#include <utils/compiler.h>
|
||||
#include <utils/debug.h>
|
||||
#include <utils/ostream.h>
|
||||
|
||||
#if !defined(WIN32) && !defined(__EMSCRIPTEN__) && !defined(IOS)
|
||||
# include <sys/mman.h>
|
||||
# include <unistd.h>
|
||||
@@ -24,23 +32,20 @@
|
||||
# define HAS_MMAP 0
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <utils/architecture.h>
|
||||
#include <utils/ashmem.h>
|
||||
#include <utils/debug.h>
|
||||
#include <utils/Log.h>
|
||||
#include <utils/Panic.h>
|
||||
|
||||
using namespace utils;
|
||||
|
||||
namespace filament::backend {
|
||||
|
||||
size_t CircularBuffer::sPageSize = arch::getPageSize();
|
||||
|
||||
CircularBuffer::CircularBuffer(size_t size) {
|
||||
CircularBuffer::CircularBuffer(size_t size)
|
||||
: mSize(size) {
|
||||
mData = alloc(size);
|
||||
mSize = size;
|
||||
mTail = mData;
|
||||
mHead = mData;
|
||||
}
|
||||
@@ -85,7 +90,7 @@ void* CircularBuffer::alloc(size_t size) noexcept {
|
||||
MAP_PRIVATE, fd, (off_t)size);
|
||||
if (vaddr_guard != MAP_FAILED && (vaddr_guard == (char*)vaddr_shadow + size)) {
|
||||
// woo-hoo success!
|
||||
mUsesAshmem = fd;
|
||||
mAshmemFd = fd;
|
||||
data = vaddr;
|
||||
}
|
||||
}
|
||||
@@ -93,7 +98,7 @@ void* CircularBuffer::alloc(size_t size) noexcept {
|
||||
}
|
||||
}
|
||||
|
||||
if (UTILS_UNLIKELY(mUsesAshmem < 0)) {
|
||||
if (UTILS_UNLIKELY(mAshmemFd < 0)) {
|
||||
// ashmem failed
|
||||
if (vaddr_guard != MAP_FAILED) {
|
||||
munmap(vaddr_guard, size);
|
||||
@@ -137,9 +142,9 @@ void CircularBuffer::dealloc() noexcept {
|
||||
if (mData) {
|
||||
size_t const BLOCK_SIZE = getBlockSize();
|
||||
munmap(mData, mSize * 2 + BLOCK_SIZE);
|
||||
if (mUsesAshmem >= 0) {
|
||||
close(mUsesAshmem);
|
||||
mUsesAshmem = -1;
|
||||
if (mAshmemFd >= 0) {
|
||||
close(mAshmemFd);
|
||||
mAshmemFd = -1;
|
||||
}
|
||||
}
|
||||
#else
|
||||
@@ -149,23 +154,37 @@ void CircularBuffer::dealloc() noexcept {
|
||||
}
|
||||
|
||||
|
||||
void CircularBuffer::circularize() noexcept {
|
||||
if (mUsesAshmem > 0) {
|
||||
intptr_t const overflow = intptr_t(mHead) - (intptr_t(mData) + ssize_t(mSize));
|
||||
if (overflow >= 0) {
|
||||
assert_invariant(size_t(overflow) <= mSize);
|
||||
mHead = (void *) (intptr_t(mData) + overflow);
|
||||
#ifndef NDEBUG
|
||||
memset(mData, 0xA5, size_t(overflow));
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
// Only circularize if mHead if in the second buffer.
|
||||
if (intptr_t(mHead) - intptr_t(mData) > ssize_t(mSize)) {
|
||||
CircularBuffer::Range CircularBuffer::getBuffer() noexcept {
|
||||
Range const range{ .tail = mTail, .head = mHead };
|
||||
|
||||
char* const pData = static_cast<char*>(mData);
|
||||
char const* const pEnd = pData + mSize;
|
||||
char const* const pHead = static_cast<char const*>(mHead);
|
||||
if (UTILS_UNLIKELY(pHead >= pEnd)) {
|
||||
size_t const overflow = pHead - pEnd;
|
||||
if (UTILS_LIKELY(mAshmemFd > 0)) {
|
||||
assert_invariant(overflow <= mSize);
|
||||
mHead = static_cast<void*>(pData + overflow);
|
||||
// Data Tail End Head [virtual]
|
||||
// v v v v
|
||||
// +-------------:----+-----:--------------+
|
||||
// | : | : |
|
||||
// +-----:------------+--------------------+
|
||||
// Head |<------ copy ------>| [physical]
|
||||
} else {
|
||||
// Data Tail End Head
|
||||
// v v v v
|
||||
// +-------------:----+-----:--------------+
|
||||
// | : | : |
|
||||
// +-----|------------+-----|--------------+
|
||||
// |<---------------->|
|
||||
// sliding window
|
||||
mHead = mData;
|
||||
}
|
||||
}
|
||||
mTail = mHead;
|
||||
|
||||
return range;
|
||||
}
|
||||
|
||||
} // namespace filament::backend
|
||||
|
||||
@@ -15,14 +15,25 @@
|
||||
*/
|
||||
|
||||
#include "private/backend/CommandBufferQueue.h"
|
||||
#include "private/backend/CircularBuffer.h"
|
||||
#include "private/backend/CommandStream.h"
|
||||
|
||||
#include <utils/compiler.h>
|
||||
#include <utils/Log.h>
|
||||
#include <utils/Systrace.h>
|
||||
#include <utils/Mutex.h>
|
||||
#include <utils/ostream.h>
|
||||
#include <utils/Panic.h>
|
||||
#include <utils/Systrace.h>
|
||||
#include <utils/debug.h>
|
||||
|
||||
#include "private/backend/BackendUtils.h"
|
||||
#include "private/backend/CommandStream.h"
|
||||
#include <algorithm>
|
||||
#include <mutex>
|
||||
#include <iterator>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
using namespace utils;
|
||||
|
||||
@@ -65,50 +76,53 @@ void CommandBufferQueue::flush() noexcept {
|
||||
// always guaranteed to have enough space for the NoopCommand
|
||||
new(circularBuffer.allocate(sizeof(NoopCommand))) NoopCommand(nullptr);
|
||||
|
||||
// end of this slice
|
||||
void* const head = circularBuffer.getHead();
|
||||
const size_t requiredSize = mRequiredSize;
|
||||
|
||||
// beginning of this slice
|
||||
void* const tail = circularBuffer.getTail();
|
||||
// get the current buffer
|
||||
auto const [begin, end] = circularBuffer.getBuffer();
|
||||
|
||||
// size of this slice
|
||||
uint32_t const used = uint32_t(intptr_t(head) - intptr_t(tail));
|
||||
assert_invariant(circularBuffer.empty());
|
||||
|
||||
circularBuffer.circularize();
|
||||
// size of the current buffer
|
||||
size_t const used = std::distance(
|
||||
static_cast<char const*>(begin), static_cast<char const*>(end));
|
||||
|
||||
std::unique_lock<utils::Mutex> lock(mLock);
|
||||
mCommandBuffersToExecute.push_back({ tail, head });
|
||||
mCommandBuffersToExecute.push_back({ begin, end });
|
||||
mCondition.notify_one();
|
||||
|
||||
// circular buffer is too small, we corrupted the stream
|
||||
ASSERT_POSTCONDITION(used <= mFreeSpace,
|
||||
"Backend CommandStream overflow. Commands are corrupted and unrecoverable.\n"
|
||||
"Please increase minCommandBufferSizeMB inside the Config passed to Engine::create.\n"
|
||||
"Space used at this time: %u bytes",
|
||||
(unsigned)used);
|
||||
"Space used at this time: %u bytes, overflow: %u bytes",
|
||||
(unsigned)used, unsigned(used - mFreeSpace));
|
||||
|
||||
// wait until there is enough space in the buffer
|
||||
mFreeSpace -= used;
|
||||
const size_t requiredSize = mRequiredSize;
|
||||
if (UTILS_UNLIKELY(mFreeSpace < requiredSize)) {
|
||||
|
||||
|
||||
#ifndef NDEBUG
|
||||
size_t totalUsed = circularBuffer.size() - mFreeSpace;
|
||||
mHighWatermark = std::max(mHighWatermark, totalUsed);
|
||||
if (UTILS_UNLIKELY(totalUsed > requiredSize)) {
|
||||
slog.d << "CommandStream used too much space: " << totalUsed
|
||||
<< ", out of " << requiredSize << " (will block)" << io::endl;
|
||||
}
|
||||
size_t const totalUsed = circularBuffer.size() - mFreeSpace;
|
||||
slog.d << "CommandStream used too much space (will block): "
|
||||
<< "needed space " << requiredSize << " out of " << mFreeSpace
|
||||
<< ", totalUsed=" << totalUsed << ", current=" << used
|
||||
<< ", queue size=" << mCommandBuffersToExecute.size() << " buffers"
|
||||
<< io::endl;
|
||||
|
||||
mHighWatermark = std::max(mHighWatermark, totalUsed);
|
||||
#endif
|
||||
|
||||
mCondition.notify_one();
|
||||
if (UTILS_LIKELY(mFreeSpace < requiredSize)) {
|
||||
SYSTRACE_NAME("waiting: CircularBuffer::flush()");
|
||||
mCondition.wait(lock, [this, requiredSize]() -> bool {
|
||||
// TODO: on macOS, we need to call pumpEvents from time to time
|
||||
return mFreeSpace >= requiredSize;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<CommandBufferQueue::Slice> CommandBufferQueue::waitForCommands() const {
|
||||
std::vector<CommandBufferQueue::Range> CommandBufferQueue::waitForCommands() const {
|
||||
if (!UTILS_HAS_THREADING) {
|
||||
return std::move(mCommandBuffersToExecute);
|
||||
}
|
||||
@@ -123,7 +137,7 @@ std::vector<CommandBufferQueue::Slice> CommandBufferQueue::waitForCommands() con
|
||||
return std::move(mCommandBuffersToExecute);
|
||||
}
|
||||
|
||||
void CommandBufferQueue::releaseBuffer(CommandBufferQueue::Slice const& buffer) {
|
||||
void CommandBufferQueue::releaseBuffer(CommandBufferQueue::Range const& buffer) {
|
||||
std::lock_guard<utils::Mutex> const lock(mLock);
|
||||
mFreeSpace += uintptr_t(buffer.end) - uintptr_t(buffer.begin);
|
||||
mCondition.notify_one();
|
||||
|
||||
@@ -54,7 +54,7 @@ using LinearAllocatorArena = utils::Arena<
|
||||
|
||||
#endif
|
||||
|
||||
using ArenaScope = utils::ArenaScope<LinearAllocatorArena>;
|
||||
using RootArenaScope = utils::ArenaScope<LinearAllocatorArena>;
|
||||
|
||||
} // namespace filament
|
||||
|
||||
|
||||
@@ -168,7 +168,8 @@ void Froxelizer::setProjection(const mat4f& projection,
|
||||
}
|
||||
|
||||
bool Froxelizer::prepare(
|
||||
FEngine::DriverApi& driverApi, ArenaScope& arena, filament::Viewport const& viewport,
|
||||
FEngine::DriverApi& driverApi, RootArenaScope& rootArenaScope,
|
||||
filament::Viewport const& viewport,
|
||||
const mat4f& projection, float projectionNear, float projectionFar) noexcept {
|
||||
setViewport(viewport);
|
||||
setProjection(projection, projectionNear, projectionFar);
|
||||
@@ -199,12 +200,12 @@ bool Froxelizer::prepare(
|
||||
|
||||
// light records per froxel (~256 KiB)
|
||||
mLightRecords = {
|
||||
arena.allocate<LightRecord>(getFroxelBufferEntryCount(), CACHELINE_SIZE),
|
||||
rootArenaScope.allocate<LightRecord>(getFroxelBufferEntryCount(), CACHELINE_SIZE),
|
||||
getFroxelBufferEntryCount() };
|
||||
|
||||
// froxel thread data (~256 KiB)
|
||||
mFroxelShardedData = {
|
||||
arena.allocate<FroxelThreadData>(GROUP_COUNT, CACHELINE_SIZE),
|
||||
rootArenaScope.allocate<FroxelThreadData>(GROUP_COUNT, CACHELINE_SIZE),
|
||||
uint32_t(GROUP_COUNT)
|
||||
};
|
||||
|
||||
|
||||
@@ -110,7 +110,7 @@ public:
|
||||
*
|
||||
* return true if updateUniforms() needs to be called
|
||||
*/
|
||||
bool prepare(backend::DriverApi& driverApi, ArenaScope& arena, Viewport const& viewport,
|
||||
bool prepare(backend::DriverApi& driverApi, RootArenaScope& rootArenaScope, Viewport const& viewport,
|
||||
const math::mat4f& projection, float projectionNear, float projectionFar) noexcept;
|
||||
|
||||
Froxel getFroxelAt(size_t x, size_t y, size_t z) const noexcept;
|
||||
|
||||
@@ -414,7 +414,7 @@ void PostProcessManager::commitAndRender(FrameGraphResources::RenderPassInfo con
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
|
||||
PostProcessManager::StructurePassOutput PostProcessManager::structure(FrameGraph& fg,
|
||||
RenderPass const& pass, uint8_t structureRenderFlags,
|
||||
RenderPassBuilder const& passBuilder, uint8_t structureRenderFlags,
|
||||
uint32_t width, uint32_t height,
|
||||
StructurePassConfig const& config) noexcept {
|
||||
|
||||
@@ -466,17 +466,19 @@ PostProcessManager::StructurePassOutput PostProcessManager::structure(FrameGraph
|
||||
.clearFlags = TargetBufferFlags::COLOR0 | TargetBufferFlags::DEPTH
|
||||
});
|
||||
},
|
||||
[=, renderPass = pass](FrameGraphResources const& resources,
|
||||
[=, passBuilder = passBuilder](FrameGraphResources const& resources,
|
||||
auto const&, DriverApi&) mutable {
|
||||
Variant structureVariant(Variant::DEPTH_VARIANT);
|
||||
structureVariant.setPicking(config.picking);
|
||||
|
||||
auto out = resources.getRenderPassInfo();
|
||||
renderPass.setRenderFlags(structureRenderFlags);
|
||||
renderPass.setVariant(structureVariant);
|
||||
renderPass.appendCommands(mEngine, RenderPass::CommandTypeFlags::SSAO);
|
||||
renderPass.sortCommands(mEngine);
|
||||
renderPass.execute(mEngine, resources.getPassName(), out.target, out.params);
|
||||
|
||||
passBuilder.renderFlags(structureRenderFlags);
|
||||
passBuilder.variant(structureVariant);
|
||||
passBuilder.commandTypeFlags(RenderPass::CommandTypeFlags::SSAO);
|
||||
|
||||
RenderPass const pass{ passBuilder.build(mEngine) };
|
||||
RenderPass::execute(pass, mEngine, resources.getPassName(), out.target, out.params);
|
||||
});
|
||||
|
||||
auto depth = structurePass->depth;
|
||||
@@ -523,7 +525,7 @@ PostProcessManager::StructurePassOutput PostProcessManager::structure(FrameGraph
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
|
||||
FrameGraphId<FrameGraphTexture> PostProcessManager::ssr(FrameGraph& fg,
|
||||
RenderPass const& pass,
|
||||
RenderPassBuilder const& passBuilder,
|
||||
FrameHistory const& frameHistory,
|
||||
CameraInfo const& cameraInfo,
|
||||
PerViewUniforms& uniforms,
|
||||
@@ -586,7 +588,7 @@ FrameGraphId<FrameGraphTexture> PostProcessManager::ssr(FrameGraph& fg,
|
||||
},
|
||||
[this, projection = cameraInfo.projection,
|
||||
userViewMatrix = cameraInfo.getUserViewMatrix(), uvFromClipMatrix, historyProjection,
|
||||
options, &uniforms, renderPass = pass]
|
||||
options, &uniforms, passBuilder = passBuilder]
|
||||
(FrameGraphResources const& resources, auto const& data, DriverApi& driver) mutable {
|
||||
// set structure sampler
|
||||
uniforms.prepareStructure(data.structure ?
|
||||
@@ -607,17 +609,17 @@ FrameGraphId<FrameGraphTexture> PostProcessManager::ssr(FrameGraph& fg,
|
||||
auto out = resources.getRenderPassInfo();
|
||||
|
||||
// Remove the HAS_SHADOWING RenderFlags, since it's irrelevant when rendering reflections
|
||||
RenderPass::RenderFlags flags = renderPass.getRenderFlags();
|
||||
flags &= ~RenderPass::HAS_SHADOWING;
|
||||
renderPass.setRenderFlags(flags);
|
||||
passBuilder.renderFlags(~RenderPass::HAS_SHADOWING, 0);
|
||||
|
||||
// use our special SSR variant, it can only be applied to object that have
|
||||
// the SCREEN_SPACE ReflectionMode.
|
||||
renderPass.setVariant(Variant{Variant::SPECIAL_SSR});
|
||||
passBuilder.variant(Variant{ Variant::SPECIAL_SSR });
|
||||
|
||||
// generate all our drawing commands, except blended objects.
|
||||
renderPass.appendCommands(mEngine, RenderPass::CommandTypeFlags::SCREEN_SPACE_REFLECTIONS);
|
||||
renderPass.sortCommands(mEngine);
|
||||
renderPass.execute(mEngine, resources.getPassName(), out.target, out.params);
|
||||
passBuilder.commandTypeFlags(RenderPass::CommandTypeFlags::SCREEN_SPACE_REFLECTIONS);
|
||||
|
||||
RenderPass const pass{ passBuilder.build(mEngine) };
|
||||
RenderPass::execute(pass, mEngine, resources.getPassName(), out.target, out.params);
|
||||
});
|
||||
|
||||
return ssrPass->reflections;
|
||||
|
||||
@@ -50,6 +50,7 @@ class FMaterialInstance;
|
||||
class FrameGraph;
|
||||
class PerViewUniforms;
|
||||
class RenderPass;
|
||||
class RenderPassBuilder;
|
||||
struct CameraInfo;
|
||||
|
||||
class PostProcessManager {
|
||||
@@ -99,12 +100,12 @@ public:
|
||||
FrameGraphId<FrameGraphTexture> picking;
|
||||
};
|
||||
StructurePassOutput structure(FrameGraph& fg,
|
||||
RenderPass const& pass, uint8_t structureRenderFlags,
|
||||
RenderPassBuilder const& passBuilder, uint8_t structureRenderFlags,
|
||||
uint32_t width, uint32_t height, StructurePassConfig const& config) noexcept;
|
||||
|
||||
// reflections pass
|
||||
FrameGraphId<FrameGraphTexture> ssr(FrameGraph& fg,
|
||||
RenderPass const& pass,
|
||||
RenderPassBuilder const& passBuilder,
|
||||
FrameHistory const& frameHistory,
|
||||
CameraInfo const& cameraInfo,
|
||||
PerViewUniforms& uniforms,
|
||||
|
||||
@@ -19,17 +19,43 @@
|
||||
#include "RenderPrimitive.h"
|
||||
#include "ShadowMap.h"
|
||||
|
||||
#include "details/Camera.h"
|
||||
#include "details/Material.h"
|
||||
#include "details/MaterialInstance.h"
|
||||
#include "details/View.h"
|
||||
|
||||
#include "components/RenderableManager.h"
|
||||
|
||||
#include <private/filament/EngineEnums.h>
|
||||
#include <private/filament/UibStructs.h>
|
||||
#include <private/filament/Variant.h>
|
||||
|
||||
#include <filament/MaterialEnums.h>
|
||||
|
||||
#include <backend/DriverApiForward.h>
|
||||
#include <backend/DriverEnums.h>
|
||||
#include <backend/Handle.h>
|
||||
#include <backend/PipelineState.h>
|
||||
|
||||
#include "private/backend/CircularBuffer.h"
|
||||
|
||||
#include <utils/compiler.h>
|
||||
#include <utils/debug.h>
|
||||
#include <utils/JobSystem.h>
|
||||
#include <utils/Panic.h>
|
||||
#include <utils/Slice.h>
|
||||
#include <utils/Systrace.h>
|
||||
#include <utils/Range.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
using namespace utils;
|
||||
using namespace filament::math;
|
||||
|
||||
@@ -37,60 +63,103 @@ namespace filament {
|
||||
|
||||
using namespace backend;
|
||||
|
||||
RenderPass::RenderPass(FEngine& engine,
|
||||
RenderPass::Arena& arena) noexcept
|
||||
: mCommandArena(arena),
|
||||
mCustomCommands(engine.getPerRenderPassAllocator()) {
|
||||
RenderPassBuilder& RenderPassBuilder::customCommand(
|
||||
FEngine& engine,
|
||||
uint8_t channel,
|
||||
RenderPass::Pass pass,
|
||||
RenderPass::CustomCommand custom,
|
||||
uint32_t order,
|
||||
RenderPass::Executor::CustomCommandFn const& command) {
|
||||
if (!mCustomCommands.has_value()) {
|
||||
// construct the vector the first time
|
||||
mCustomCommands.emplace(engine.getPerRenderPassArena());
|
||||
}
|
||||
mCustomCommands->emplace_back(channel, pass, custom, order, command);
|
||||
return *this;
|
||||
}
|
||||
|
||||
RenderPass::RenderPass(RenderPass const& rhs) = default;
|
||||
RenderPass RenderPassBuilder::build(FEngine& engine) {
|
||||
ASSERT_POSTCONDITION(mRenderableSoa, "RenderPassBuilder::geometry() hasn't been called");
|
||||
assert_invariant(mScissorViewport.width <= std::numeric_limits<int32_t>::max());
|
||||
assert_invariant(mScissorViewport.height <= std::numeric_limits<int32_t>::max());
|
||||
return RenderPass{ engine, *this };
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
|
||||
RenderPass::RenderPass(FEngine& engine, RenderPassBuilder const& builder) noexcept
|
||||
: mRenderableSoa(*builder.mRenderableSoa),
|
||||
mVisibleRenderables(builder.mVisibleRenderables),
|
||||
mUboHandle(builder.mUboHandle),
|
||||
mCameraPosition(builder.mCameraPosition),
|
||||
mCameraForwardVector(builder.mCameraForwardVector),
|
||||
mFlags(builder.mFlags),
|
||||
mVariant(builder.mVariant),
|
||||
mVisibilityMask(builder.mVisibilityMask),
|
||||
mScissorViewport(builder.mScissorViewport),
|
||||
mCustomCommands(engine.getPerRenderPassArena()) {
|
||||
|
||||
// compute the number of commands we need
|
||||
updateSummedPrimitiveCounts(
|
||||
const_cast<FScene::RenderableSoa&>(mRenderableSoa), mVisibleRenderables);
|
||||
|
||||
uint32_t commandCount =
|
||||
FScene::getPrimitiveCount(mRenderableSoa, mVisibleRenderables.last);
|
||||
const bool colorPass = bool(builder.mCommandTypeFlags & CommandTypeFlags::COLOR);
|
||||
const bool depthPass = bool(builder.mCommandTypeFlags & CommandTypeFlags::DEPTH);
|
||||
commandCount *= uint32_t(colorPass * 2 + depthPass);
|
||||
commandCount += 1; // for the sentinel
|
||||
|
||||
uint32_t const customCommandCount =
|
||||
builder.mCustomCommands.has_value() ? builder.mCustomCommands->size() : 0;
|
||||
|
||||
Command* const curr = builder.mArena.alloc<Command>(commandCount + customCommandCount);
|
||||
assert_invariant(curr);
|
||||
|
||||
if (UTILS_UNLIKELY(builder.mArena.getAllocator().isHeapAllocation(curr))) {
|
||||
static bool sLogOnce = true;
|
||||
if (UTILS_UNLIKELY(sLogOnce)) {
|
||||
sLogOnce = false;
|
||||
PANIC_LOG("RenderPass arena is full, using slower system heap. Please increase "
|
||||
"the appropriate constant (e.g. FILAMENT_PER_RENDER_PASS_ARENA_SIZE_IN_MB).");
|
||||
}
|
||||
}
|
||||
|
||||
mCommandBegin = curr;
|
||||
mCommandEnd = curr + commandCount + customCommandCount;
|
||||
|
||||
appendCommands(engine, { curr, commandCount }, builder.mCommandTypeFlags);
|
||||
|
||||
if (builder.mCustomCommands.has_value()) {
|
||||
Command* p = curr + commandCount;
|
||||
for (auto [channel, passId, command, order, fn]: builder.mCustomCommands.value()) {
|
||||
appendCustomCommand(p++, channel, passId, command, order, fn);
|
||||
}
|
||||
}
|
||||
|
||||
// sort commands once we're done adding commands
|
||||
sortCommands(builder.mArena);
|
||||
|
||||
if (engine.isAutomaticInstancingEnabled()) {
|
||||
instanceify(engine, builder.mArena);
|
||||
}
|
||||
}
|
||||
|
||||
// this destructor is actually heavy because it inlines ~vector<>
|
||||
RenderPass::~RenderPass() noexcept = default;
|
||||
|
||||
RenderPass::Command* RenderPass::append(size_t count) noexcept {
|
||||
// this is like an "in-place" realloc(). Works only with LinearAllocator.
|
||||
Command* const curr = mCommandArena.alloc<Command>(count);
|
||||
assert_invariant(curr);
|
||||
assert_invariant(mCommandBegin == nullptr || curr == mCommandEnd);
|
||||
if (mCommandBegin == nullptr) {
|
||||
mCommandBegin = mCommandEnd = curr;
|
||||
}
|
||||
mCommandEnd += count;
|
||||
return curr;
|
||||
}
|
||||
|
||||
void RenderPass::resize(size_t count) noexcept {
|
||||
void RenderPass::resize(Arena& arena, size_t count) noexcept {
|
||||
if (mCommandBegin) {
|
||||
mCommandEnd = mCommandBegin + count;
|
||||
mCommandArena.rewind(mCommandEnd);
|
||||
arena.rewind(mCommandEnd);
|
||||
}
|
||||
}
|
||||
|
||||
void RenderPass::setGeometry(FScene::RenderableSoa const& soa, Range<uint32_t> vr,
|
||||
backend::Handle<backend::HwBufferObject> uboHandle) noexcept {
|
||||
mRenderableSoa = &soa;
|
||||
mVisibleRenderables = vr;
|
||||
mUboHandle = uboHandle;
|
||||
}
|
||||
|
||||
void RenderPass::setCamera(const CameraInfo& camera) noexcept {
|
||||
mCameraPosition = camera.getPosition();
|
||||
mCameraForwardVector = camera.getForwardVector();
|
||||
}
|
||||
|
||||
void RenderPass::setScissorViewport(backend::Viewport viewport) noexcept {
|
||||
assert_invariant(viewport.width <= std::numeric_limits<int32_t>::max());
|
||||
assert_invariant(viewport.height <= std::numeric_limits<int32_t>::max());
|
||||
mScissorViewport = viewport;
|
||||
}
|
||||
|
||||
void RenderPass::appendCommands(FEngine& engine, CommandTypeFlags const commandTypeFlags) noexcept {
|
||||
void RenderPass::appendCommands(FEngine& engine,
|
||||
Slice<Command> commands, CommandTypeFlags const commandTypeFlags) noexcept {
|
||||
SYSTRACE_CALL();
|
||||
SYSTRACE_CONTEXT();
|
||||
|
||||
assert_invariant(mRenderableSoa);
|
||||
|
||||
utils::Range<uint32_t> const vr = mVisibleRenderables;
|
||||
// trace the number of visible renderables
|
||||
SYSTRACE_VALUE32("visibleRenderables", vr.size());
|
||||
@@ -104,17 +173,10 @@ void RenderPass::appendCommands(FEngine& engine, CommandTypeFlags const commandT
|
||||
const FScene::VisibleMaskType visibilityMask = mVisibilityMask;
|
||||
|
||||
// up-to-date summed primitive counts needed for generateCommands()
|
||||
FScene::RenderableSoa const& soa = *mRenderableSoa;
|
||||
updateSummedPrimitiveCounts(const_cast<FScene::RenderableSoa&>(soa), vr);
|
||||
FScene::RenderableSoa const& soa = mRenderableSoa;
|
||||
|
||||
// compute how much maximum storage we need for this pass
|
||||
uint32_t commandCount = FScene::getPrimitiveCount(soa, vr.last);
|
||||
// double the color pass for transparent objects that need to render twice
|
||||
const bool colorPass = bool(commandTypeFlags & CommandTypeFlags::COLOR);
|
||||
const bool depthPass = bool(commandTypeFlags & CommandTypeFlags::DEPTH);
|
||||
commandCount *= uint32_t(colorPass * 2 + depthPass);
|
||||
commandCount += 1; // for the sentinel
|
||||
Command* const curr = append(commandCount);
|
||||
Command* curr = commands.data();
|
||||
size_t const commandCount = commands.size();
|
||||
|
||||
auto stereoscopicEyeCount =
|
||||
renderFlags & IS_STEREOSCOPIC ? engine.getConfig().stereoscopicEyeCount : 1;
|
||||
@@ -152,7 +214,8 @@ void RenderPass::appendCommands(FEngine& engine, CommandTypeFlags const commandT
|
||||
}
|
||||
}
|
||||
|
||||
void RenderPass::appendCustomCommand(uint8_t channel, Pass pass, CustomCommand custom, uint32_t order,
|
||||
void RenderPass::appendCustomCommand(Command* commands,
|
||||
uint8_t channel, Pass pass, CustomCommand custom, uint32_t order,
|
||||
Executor::CustomCommandFn command) {
|
||||
|
||||
assert_invariant((uint64_t(order) << CUSTOM_ORDER_SHIFT) <= CUSTOM_ORDER_MASK);
|
||||
@@ -168,11 +231,10 @@ void RenderPass::appendCustomCommand(uint8_t channel, Pass pass, CustomCommand c
|
||||
cmd |= uint64_t(order) << CUSTOM_ORDER_SHIFT;
|
||||
cmd |= uint64_t(index);
|
||||
|
||||
Command* const curr = append(1);
|
||||
curr->key = cmd;
|
||||
commands->key = cmd;
|
||||
}
|
||||
|
||||
void RenderPass::sortCommands(FEngine& engine) noexcept {
|
||||
void RenderPass::sortCommands(Arena& arena) noexcept {
|
||||
SYSTRACE_NAME("sort and trim commands");
|
||||
|
||||
std::sort(mCommandBegin, mCommandEnd);
|
||||
@@ -183,30 +245,20 @@ void RenderPass::sortCommands(FEngine& engine) noexcept {
|
||||
return c.key != uint64_t(Pass::SENTINEL);
|
||||
});
|
||||
|
||||
resize(uint32_t(last - mCommandBegin));
|
||||
|
||||
if (engine.isAutomaticInstancingEnabled()) {
|
||||
instanceify(engine);
|
||||
}
|
||||
resize(arena, uint32_t(last - mCommandBegin));
|
||||
}
|
||||
|
||||
void RenderPass::execute(FEngine& engine, const char* name,
|
||||
void RenderPass::execute(RenderPass const& pass,
|
||||
FEngine& engine, const char* name,
|
||||
backend::Handle<backend::HwRenderTarget> renderTarget,
|
||||
backend::RenderPassParams params) const noexcept {
|
||||
|
||||
backend::RenderPassParams params) noexcept {
|
||||
DriverApi& driver = engine.getDriverApi();
|
||||
|
||||
// this is a good time to flush the CommandStream, because we're about to potentially
|
||||
// output a lot of commands. This guarantees here that we have at least
|
||||
// FILAMENT_MIN_COMMAND_BUFFERS_SIZE_IN_MB bytes (1MiB by default).
|
||||
engine.flush();
|
||||
|
||||
driver.beginRenderPass(renderTarget, params);
|
||||
getExecutor().execute(engine, name);
|
||||
pass.getExecutor().execute(engine, name);
|
||||
driver.endRenderPass();
|
||||
}
|
||||
|
||||
void RenderPass::instanceify(FEngine& engine) noexcept {
|
||||
void RenderPass::instanceify(FEngine& engine, Arena& arena) noexcept {
|
||||
SYSTRACE_NAME("instanceify");
|
||||
|
||||
// instanceify works by scanning the **sorted** command stream, looking for repeat draw
|
||||
@@ -262,7 +314,8 @@ void RenderPass::instanceify(FEngine& engine) noexcept {
|
||||
// buffer large enough for all instances data
|
||||
stagingBufferSize = sizeof(PerRenderableData) * (last - curr);
|
||||
stagingBuffer = (PerRenderableData*)::malloc(stagingBufferSize);
|
||||
uboData = mRenderableSoa->data<FScene::UBO>();
|
||||
uboData = mRenderableSoa.data<FScene::UBO>();
|
||||
assert_invariant(uboData);
|
||||
}
|
||||
|
||||
// copy the ubo data to a staging buffer
|
||||
@@ -315,7 +368,7 @@ void RenderPass::instanceify(FEngine& engine) noexcept {
|
||||
return command.key == uint64_t(Pass::SENTINEL);
|
||||
});
|
||||
|
||||
resize(uint32_t(lastCommand - mCommandBegin));
|
||||
resize(arena, uint32_t(lastCommand - mCommandBegin));
|
||||
}
|
||||
|
||||
assert_invariant(stagingBuffer == nullptr);
|
||||
@@ -323,7 +376,7 @@ void RenderPass::instanceify(FEngine& engine) noexcept {
|
||||
|
||||
|
||||
/* static */
|
||||
UTILS_ALWAYS_INLINE // this function exists only to make the code more readable. we want it inlined.
|
||||
UTILS_ALWAYS_INLINE // This function exists only to make the code more readable. we want it inlined.
|
||||
inline // and we don't need it in the compilation unit
|
||||
void RenderPass::setupColorCommand(Command& cmdDraw, Variant variant,
|
||||
FMaterialInstance const* const UTILS_RESTRICT mi, bool inverseFrontFaces) noexcept {
|
||||
@@ -374,7 +427,7 @@ void RenderPass::setupColorCommand(Command& cmdDraw, Variant variant,
|
||||
|
||||
/* static */
|
||||
UTILS_NOINLINE
|
||||
void RenderPass::generateCommands(uint32_t commandTypeFlags, Command* const commands,
|
||||
void RenderPass::generateCommands(CommandTypeFlags commandTypeFlags, Command* const commands,
|
||||
FScene::RenderableSoa const& soa, Range<uint32_t> range,
|
||||
Variant variant, RenderFlags renderFlags,
|
||||
FScene::VisibleMaskType visibilityMask, float3 cameraPosition, float3 cameraForward,
|
||||
@@ -432,9 +485,9 @@ void RenderPass::generateCommands(uint32_t commandTypeFlags, Command* const comm
|
||||
}
|
||||
|
||||
/* static */
|
||||
template<uint32_t commandTypeFlags>
|
||||
template<RenderPass::CommandTypeFlags commandTypeFlags>
|
||||
UTILS_NOINLINE
|
||||
RenderPass::Command* RenderPass::generateCommandsImpl(uint32_t extraFlags,
|
||||
RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFlags extraFlags,
|
||||
Command* UTILS_RESTRICT curr,
|
||||
FScene::RenderableSoa const& UTILS_RESTRICT soa, Range<uint32_t> range,
|
||||
Variant const variant, RenderFlags renderFlags, FScene::VisibleMaskType visibilityMask,
|
||||
@@ -737,13 +790,13 @@ void RenderPass::updateSummedPrimitiveCounts(
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
|
||||
void RenderPass::Executor::overridePolygonOffset(backend::PolygonOffset const* polygonOffset) noexcept {
|
||||
if ((mPolygonOffsetOverride = (polygonOffset != nullptr))) {
|
||||
if ((mPolygonOffsetOverride = (polygonOffset != nullptr))) { // NOLINT(*-assignment-in-if-condition)
|
||||
mPolygonOffset = *polygonOffset;
|
||||
}
|
||||
}
|
||||
|
||||
void RenderPass::Executor::overrideScissor(backend::Viewport const* scissor) noexcept {
|
||||
if ((mScissorOverride = (scissor != nullptr))) {
|
||||
if ((mScissorOverride = (scissor != nullptr))) { // NOLINT(*-assignment-in-if-condition)
|
||||
mScissor = *scissor;
|
||||
}
|
||||
}
|
||||
@@ -754,15 +807,20 @@ void RenderPass::Executor::overrideScissor(backend::Viewport const& scissor) noe
|
||||
}
|
||||
|
||||
void RenderPass::Executor::execute(FEngine& engine, const char*) const noexcept {
|
||||
execute(engine.getDriverApi(), mCommands.begin(), mCommands.end());
|
||||
execute(engine, mCommands.begin(), mCommands.end());
|
||||
}
|
||||
|
||||
UTILS_NOINLINE // no need to be inlined
|
||||
void RenderPass::Executor::execute(backend::DriverApi& driver,
|
||||
void RenderPass::Executor::execute(FEngine& engine,
|
||||
const Command* first, const Command* last) const noexcept {
|
||||
|
||||
SYSTRACE_CALL();
|
||||
SYSTRACE_CONTEXT();
|
||||
|
||||
DriverApi& driver = engine.getDriverApi();
|
||||
size_t const capacity = engine.getMinCommandBufferSize();
|
||||
CircularBuffer const& circularBuffer = driver.getCircularBuffer();
|
||||
|
||||
if (first != last) {
|
||||
SYSTRACE_VALUE32("commandCount", last - first);
|
||||
|
||||
@@ -781,126 +839,163 @@ void RenderPass::Executor::execute(backend::DriverApi& driver,
|
||||
FMaterial const* UTILS_RESTRICT ma = nullptr;
|
||||
auto const* UTILS_RESTRICT pCustomCommands = mCustomCommands.data();
|
||||
|
||||
first--;
|
||||
while (++first != last) {
|
||||
assert_invariant(first->key != uint64_t(Pass::SENTINEL));
|
||||
// Maximum space occupied in the CircularBuffer by a single `Command`. This must be
|
||||
// reevaluated when the inner loop below adds DriverApi commands or when we change the
|
||||
// CommandStream protocol. Currently, the maximum is 240 bytes, and we use 256 to be on
|
||||
// the safer side.
|
||||
size_t const maxCommandSizeInBytes = 256;
|
||||
|
||||
/*
|
||||
* Be careful when changing code below, this is the hot inner-loop
|
||||
*/
|
||||
// Number of Commands that can be issued and guaranteed to fit in the current
|
||||
// CircularBuffer allocation. In practice, we'll have tons of headroom especially if
|
||||
// skinning and morphing aren't used. With a 2 MiB buffer (the default) a batch is
|
||||
// 8192 commands (i.e. draw calls).
|
||||
size_t const batchCommandCount = capacity / maxCommandSizeInBytes;
|
||||
while(first != last) {
|
||||
Command const* const batchLast = std::min(first + batchCommandCount, last);
|
||||
|
||||
if (UTILS_UNLIKELY((first->key & CUSTOM_MASK) != uint64_t(CustomCommand::PASS))) {
|
||||
mi = nullptr; // custom command could change the currently bound MaterialInstance
|
||||
uint32_t const index = (first->key & CUSTOM_INDEX_MASK) >> CUSTOM_INDEX_SHIFT;
|
||||
assert_invariant(index < mCustomCommands.size());
|
||||
pCustomCommands[index]();
|
||||
continue;
|
||||
// actual number of commands we need to write (can be smaller than batchCommandCount)
|
||||
size_t const commandCount = batchLast - first;
|
||||
size_t const commandSizeInBytes = commandCount * maxCommandSizeInBytes;
|
||||
|
||||
// check we have enough capacity to write these commandCount commands, if not,
|
||||
// request a new CircularBuffer allocation of `capacity` bytes.
|
||||
if (UTILS_UNLIKELY(circularBuffer.getUsed() > capacity - commandSizeInBytes)) {
|
||||
engine.flush(); // TODO: we should use a "fast" flush if possible
|
||||
}
|
||||
|
||||
// primitiveHandle may be invalid if no geometry was set on the renderable.
|
||||
if (UTILS_UNLIKELY(!first->primitive.primitiveHandle)) {
|
||||
continue;
|
||||
}
|
||||
first--;
|
||||
while (++first != batchLast) {
|
||||
assert_invariant(first->key != uint64_t(Pass::SENTINEL));
|
||||
|
||||
// per-renderable uniform
|
||||
const PrimitiveInfo info = first->primitive;
|
||||
pipeline.rasterState = info.rasterState;
|
||||
/*
|
||||
* Be careful when changing code below, this is the hot inner-loop
|
||||
*/
|
||||
|
||||
if (UTILS_UNLIKELY(mi != info.mi)) {
|
||||
// this is always taken the first time
|
||||
mi = info.mi;
|
||||
ma = mi->getMaterial();
|
||||
|
||||
auto const& scissor = mi->getScissor();
|
||||
if (UTILS_UNLIKELY(mi->hasScissor())) {
|
||||
// scissor is set, we need to apply the offset/clip
|
||||
// clang vectorizes this!
|
||||
constexpr int32_t maxvali = std::numeric_limits<int32_t>::max();
|
||||
const backend::Viewport scissorViewport = mScissorViewport;
|
||||
// compute new left/bottom, assume no overflow
|
||||
int32_t l = scissor.left + scissorViewport.left;
|
||||
int32_t b = scissor.bottom + scissorViewport.bottom;
|
||||
// compute right/top without overflowing, scissor.width/height guaranteed
|
||||
// to convert to int32
|
||||
int32_t r = (l > maxvali - int32_t(scissor.width)) ?
|
||||
maxvali : l + int32_t(scissor.width);
|
||||
int32_t t = (b > maxvali - int32_t(scissor.height)) ?
|
||||
maxvali : b + int32_t(scissor.height);
|
||||
// clip to the viewport
|
||||
l = std::max(l, scissorViewport.left);
|
||||
b = std::max(b, scissorViewport.bottom);
|
||||
r = std::min(r, scissorViewport.left + int32_t(scissorViewport.width));
|
||||
t = std::min(t, scissorViewport.bottom + int32_t(scissorViewport.height));
|
||||
assert_invariant(r >= l && t >= b);
|
||||
*pScissor = { l, b, uint32_t(r - l), uint32_t(t - b) };
|
||||
} else {
|
||||
// no scissor set (common case), 'scissor' has its default value, use that.
|
||||
*pScissor = scissor;
|
||||
if (UTILS_UNLIKELY((first->key & CUSTOM_MASK) != uint64_t(CustomCommand::PASS))) {
|
||||
mi = nullptr; // custom command could change the currently bound MaterialInstance
|
||||
uint32_t const index = (first->key & CUSTOM_INDEX_MASK) >> CUSTOM_INDEX_SHIFT;
|
||||
assert_invariant(index < mCustomCommands.size());
|
||||
pCustomCommands[index]();
|
||||
continue;
|
||||
}
|
||||
|
||||
*pPipelinePolygonOffset = mi->getPolygonOffset();
|
||||
pipeline.stencilState = mi->getStencilState();
|
||||
mi->use(driver);
|
||||
}
|
||||
|
||||
pipeline.program = ma->getProgram(info.materialVariant);
|
||||
|
||||
uint16_t const instanceCount = info.instanceCount & PrimitiveInfo::INSTANCE_COUNT_MASK;
|
||||
auto getPerObjectUboHandle =
|
||||
[this, &info, &instanceCount]() -> std::pair<Handle<backend::HwBufferObject>, uint32_t> {
|
||||
if (info.instanceBufferHandle) {
|
||||
// "hybrid" instancing -- instanceBufferHandle takes the place of the UBO
|
||||
return { info.instanceBufferHandle, 0 };
|
||||
// primitiveHandle may be invalid if no geometry was set on the renderable.
|
||||
if (UTILS_UNLIKELY(!first->primitive.primitiveHandle)) {
|
||||
continue;
|
||||
}
|
||||
bool const userInstancing =
|
||||
(info.instanceCount & PrimitiveInfo::USER_INSTANCE_MASK) != 0u;
|
||||
if (!userInstancing && instanceCount > 1) {
|
||||
// automatic instancing
|
||||
return { mInstancedUboHandle, info.index * sizeof(PerRenderableData) };
|
||||
} else {
|
||||
// manual instancing
|
||||
return { mUboHandle, info.index * sizeof(PerRenderableData) };
|
||||
|
||||
// per-renderable uniform
|
||||
const PrimitiveInfo info = first->primitive;
|
||||
pipeline.rasterState = info.rasterState;
|
||||
|
||||
if (UTILS_UNLIKELY(mi != info.mi)) {
|
||||
// this is always taken the first time
|
||||
mi = info.mi;
|
||||
assert_invariant(mi);
|
||||
|
||||
ma = mi->getMaterial();
|
||||
|
||||
auto const& scissor = mi->getScissor();
|
||||
if (UTILS_UNLIKELY(mi->hasScissor())) {
|
||||
// scissor is set, we need to apply the offset/clip
|
||||
// clang vectorizes this!
|
||||
constexpr int32_t maxvali = std::numeric_limits<int32_t>::max();
|
||||
const backend::Viewport scissorViewport = mScissorViewport;
|
||||
// compute new left/bottom, assume no overflow
|
||||
int32_t l = scissor.left + scissorViewport.left;
|
||||
int32_t b = scissor.bottom + scissorViewport.bottom;
|
||||
// compute right/top without overflowing, scissor.width/height guaranteed
|
||||
// to convert to int32
|
||||
int32_t r = (l > maxvali - int32_t(scissor.width)) ?
|
||||
maxvali : l + int32_t(scissor.width);
|
||||
int32_t t = (b > maxvali - int32_t(scissor.height)) ?
|
||||
maxvali : b + int32_t(scissor.height);
|
||||
// clip to the viewport
|
||||
l = std::max(l, scissorViewport.left);
|
||||
b = std::max(b, scissorViewport.bottom);
|
||||
r = std::min(r, scissorViewport.left + int32_t(scissorViewport.width));
|
||||
t = std::min(t, scissorViewport.bottom + int32_t(scissorViewport.height));
|
||||
assert_invariant(r >= l && t >= b);
|
||||
*pScissor = { l, b, uint32_t(r - l), uint32_t(t - b) };
|
||||
} else {
|
||||
// no scissor set (common case), 'scissor' has its default value, use that.
|
||||
*pScissor = scissor;
|
||||
}
|
||||
|
||||
*pPipelinePolygonOffset = mi->getPolygonOffset();
|
||||
pipeline.stencilState = mi->getStencilState();
|
||||
mi->use(driver);
|
||||
}
|
||||
};
|
||||
|
||||
// bind per-renderable uniform block. there is no need to attempt to skip this command
|
||||
// because the backends already do this.
|
||||
auto const [perObjectUboHandle, offset] = getPerObjectUboHandle();
|
||||
assert_invariant(perObjectUboHandle);
|
||||
driver.bindBufferRange(BufferObjectBinding::UNIFORM,
|
||||
+UniformBindingPoints::PER_RENDERABLE,
|
||||
perObjectUboHandle,
|
||||
offset,
|
||||
sizeof(PerRenderableUib));
|
||||
assert_invariant(ma);
|
||||
pipeline.program = ma->getProgram(info.materialVariant);
|
||||
|
||||
if (UTILS_UNLIKELY(info.skinningHandle)) {
|
||||
// note: we can't bind less than sizeof(PerRenderableBoneUib) due to glsl limitations
|
||||
uint16_t const instanceCount =
|
||||
info.instanceCount & PrimitiveInfo::INSTANCE_COUNT_MASK;
|
||||
auto getPerObjectUboHandle =
|
||||
[this, &info, &instanceCount]() -> std::pair<Handle<backend::HwBufferObject>, uint32_t> {
|
||||
if (info.instanceBufferHandle) {
|
||||
// "hybrid" instancing -- instanceBufferHandle takes the place of the UBO
|
||||
return { info.instanceBufferHandle, 0 };
|
||||
}
|
||||
bool const userInstancing =
|
||||
(info.instanceCount & PrimitiveInfo::USER_INSTANCE_MASK) != 0u;
|
||||
if (!userInstancing && instanceCount > 1) {
|
||||
// automatic instancing
|
||||
return {
|
||||
mInstancedUboHandle,
|
||||
info.index * sizeof(PerRenderableData) };
|
||||
} else {
|
||||
// manual instancing
|
||||
return { mUboHandle, info.index * sizeof(PerRenderableData) };
|
||||
}
|
||||
};
|
||||
|
||||
// Bind per-renderable uniform block. There is no need to attempt to skip this command
|
||||
// because the backends already do this.
|
||||
auto const [perObjectUboHandle, offset] = getPerObjectUboHandle();
|
||||
assert_invariant(perObjectUboHandle);
|
||||
driver.bindBufferRange(BufferObjectBinding::UNIFORM,
|
||||
+UniformBindingPoints::PER_RENDERABLE_BONES,
|
||||
info.skinningHandle,
|
||||
info.skinningOffset * sizeof(PerRenderableBoneUib::BoneData),
|
||||
sizeof(PerRenderableBoneUib));
|
||||
// note: always bind the skinningTexture because the shader needs it.
|
||||
driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING,
|
||||
info.skinningTexture);
|
||||
// note: even if only skinning is enabled, binding morphTargetBuffer is needed.
|
||||
driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING,
|
||||
info.morphTargetBuffer);
|
||||
}
|
||||
+UniformBindingPoints::PER_RENDERABLE,
|
||||
perObjectUboHandle,
|
||||
offset,
|
||||
sizeof(PerRenderableUib));
|
||||
|
||||
if (UTILS_UNLIKELY(info.morphWeightBuffer)) {
|
||||
// Instead of using a UBO per primitive, we could also have a single UBO for all
|
||||
// primitives and use bindUniformBufferRange which might be more efficient.
|
||||
driver.bindUniformBuffer(+UniformBindingPoints::PER_RENDERABLE_MORPHING,
|
||||
info.morphWeightBuffer);
|
||||
driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING,
|
||||
info.morphTargetBuffer);
|
||||
// note: even if only morphing is enabled, binding skinningTexture is needed.
|
||||
driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING,
|
||||
info.skinningTexture);
|
||||
if (UTILS_UNLIKELY(info.skinningHandle)) {
|
||||
// note: we can't bind less than sizeof(PerRenderableBoneUib) due to glsl limitations
|
||||
driver.bindBufferRange(BufferObjectBinding::UNIFORM,
|
||||
+UniformBindingPoints::PER_RENDERABLE_BONES,
|
||||
info.skinningHandle,
|
||||
info.skinningOffset * sizeof(PerRenderableBoneUib::BoneData),
|
||||
sizeof(PerRenderableBoneUib));
|
||||
// note: always bind the skinningTexture because the shader needs it.
|
||||
driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING,
|
||||
info.skinningTexture);
|
||||
// note: even if only skinning is enabled, binding morphTargetBuffer is needed.
|
||||
driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING,
|
||||
info.morphTargetBuffer);
|
||||
}
|
||||
|
||||
if (UTILS_UNLIKELY(info.morphWeightBuffer)) {
|
||||
// Instead of using a UBO per primitive, we could also have a single UBO for all
|
||||
// primitives and use bindUniformBufferRange which might be more efficient.
|
||||
driver.bindUniformBuffer(+UniformBindingPoints::PER_RENDERABLE_MORPHING,
|
||||
info.morphWeightBuffer);
|
||||
driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING,
|
||||
info.morphTargetBuffer);
|
||||
// note: even if only morphing is enabled, binding skinningTexture is needed.
|
||||
driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING,
|
||||
info.skinningTexture);
|
||||
}
|
||||
|
||||
driver.draw(pipeline, info.primitiveHandle, instanceCount);
|
||||
}
|
||||
}
|
||||
|
||||
driver.draw(pipeline, info.primitiveHandle, instanceCount);
|
||||
// If the remaining space is less than half the capacity, we flush right away to
|
||||
// allow some headroom for commands that might come later.
|
||||
if (UTILS_UNLIKELY(circularBuffer.getUsed() > capacity / 2)) {
|
||||
engine.flush();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -22,26 +22,38 @@
|
||||
#include "details/Camera.h"
|
||||
#include "details/Scene.h"
|
||||
|
||||
#include "backend/DriverApiForward.h"
|
||||
|
||||
#include <private/filament/Variant.h>
|
||||
#include "private/filament/Variant.h"
|
||||
#include "utils/BitmaskEnum.h"
|
||||
|
||||
#include <backend/DriverEnums.h>
|
||||
#include <backend/Handle.h>
|
||||
|
||||
#include <utils/Allocator.h>
|
||||
#include <utils/Range.h>
|
||||
#include <utils/Slice.h>
|
||||
#include <utils/architecture.h>
|
||||
#include <utils/compiler.h>
|
||||
#include <utils/debug.h>
|
||||
|
||||
#include <math/mathfwd.h>
|
||||
|
||||
#include <functional>
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <type_traits>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace filament {
|
||||
|
||||
namespace backend {
|
||||
class CommandBufferQueue;
|
||||
}
|
||||
|
||||
class FMaterialInstance;
|
||||
class RenderPassBuilder;
|
||||
|
||||
class RenderPass {
|
||||
public:
|
||||
@@ -171,7 +183,7 @@ public:
|
||||
EPILOG = uint64_t(0x2) << CUSTOM_SHIFT
|
||||
};
|
||||
|
||||
enum CommandTypeFlags : uint8_t {
|
||||
enum class CommandTypeFlags : uint32_t {
|
||||
COLOR = 0x1, // generate the color pass only
|
||||
DEPTH = 0x2, // generate the depth pass only ( e.g. shadowmap)
|
||||
|
||||
@@ -191,7 +203,6 @@ public:
|
||||
SCREEN_SPACE_REFLECTIONS = COLOR | FILTER_TRANSLUCENT_OBJECTS
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* The sorting material key is 32 bits and encoded as:
|
||||
*
|
||||
@@ -240,7 +251,6 @@ public:
|
||||
uint32_t skinningOffset = 0; // 4 bytes
|
||||
uint16_t instanceCount; // 2 bytes [MSb: user]
|
||||
Variant materialVariant; // 1 byte
|
||||
// uint8_t reserved[0] = {}; // 0 bytes
|
||||
|
||||
static const uint16_t USER_INSTANCE_MASK = 0x8000u;
|
||||
static const uint16_t INSTANCE_COUNT_MASK = 0x7fffu;
|
||||
@@ -253,7 +263,7 @@ public:
|
||||
uint64_t reserved[1] = {}; // 8 bytes
|
||||
bool operator < (Command const& rhs) const noexcept { return key < rhs.key; }
|
||||
// placement new declared as "throw" to avoid the compiler's null-check
|
||||
inline void* operator new (std::size_t, void* ptr) {
|
||||
inline void* operator new (size_t, void* ptr) {
|
||||
assert_invariant(ptr);
|
||||
return ptr;
|
||||
}
|
||||
@@ -269,61 +279,31 @@ public:
|
||||
|
||||
// Arena used for commands
|
||||
using Arena = utils::Arena<
|
||||
utils::LinearAllocator, // note: can't change this allocator
|
||||
utils::LinearAllocatorWithFallback,
|
||||
utils::LockingPolicy::NoLock,
|
||||
utils::TrackingPolicy::HighWatermark,
|
||||
utils::AreaPolicy::StaticArea>;
|
||||
|
||||
/*
|
||||
* Create a RenderPass.
|
||||
* The Arena is used to allocate commands which are then owned by the Arena.
|
||||
*/
|
||||
RenderPass(FEngine& engine, Arena& arena) noexcept;
|
||||
// RenderPass can only be moved
|
||||
RenderPass(RenderPass&& rhs) = default;
|
||||
|
||||
// Copy the RenderPass as is. This can be used to create a RenderPass from a "template"
|
||||
// by copying from an "empty" RenderPass.
|
||||
RenderPass(RenderPass const& rhs);
|
||||
// RenderPass can't be copied
|
||||
RenderPass(RenderPass const& rhs) = delete;
|
||||
RenderPass& operator=(RenderPass const& rhs) = delete;
|
||||
RenderPass& operator=(RenderPass&& rhs) = delete;
|
||||
|
||||
// allocated commands ARE NOT freed, they're owned by the Arena
|
||||
~RenderPass() noexcept;
|
||||
|
||||
// a box that both offsets the viewport and clips it
|
||||
void setScissorViewport(backend::Viewport viewport) noexcept;
|
||||
|
||||
// specifies the geometry to generate commands for
|
||||
void setGeometry(FScene::RenderableSoa const& soa, utils::Range<uint32_t> vr,
|
||||
backend::Handle<backend::HwBufferObject> uboHandle) noexcept;
|
||||
|
||||
// specifies camera information (e.g. used for sorting commands)
|
||||
void setCamera(const CameraInfo& camera) noexcept;
|
||||
|
||||
// flags controlling how commands are generated
|
||||
void setRenderFlags(RenderFlags flags) noexcept { mFlags = flags; }
|
||||
RenderFlags getRenderFlags() const noexcept { return mFlags; }
|
||||
|
||||
// variant to use
|
||||
void setVariant(Variant variant) noexcept { mVariant = variant; }
|
||||
|
||||
// Sets the visibility mask, which is AND-ed against each Renderable's VISIBLE_MASK to determine
|
||||
// if the renderable is visible for this pass.
|
||||
// Defaults to all 1's, which means all renderables in this render pass will be rendered.
|
||||
void setVisibilityMask(FScene::VisibleMaskType mask) noexcept { mVisibilityMask = mask; }
|
||||
|
||||
Command const* begin() const noexcept { return mCommandBegin; }
|
||||
Command const* end() const noexcept { return mCommandEnd; }
|
||||
bool empty() const noexcept { return begin() == end(); }
|
||||
|
||||
// This is the main function of this class, this appends commands to the pass using
|
||||
// the current camera, geometry and flags set. This can be called multiple times if needed.
|
||||
void appendCommands(FEngine& engine, CommandTypeFlags commandTypeFlags) noexcept;
|
||||
|
||||
// sorts and instanceify commands then trims sentinels
|
||||
void sortCommands(FEngine& engine) noexcept;
|
||||
|
||||
// Helper to execute all the commands generated by this RenderPass
|
||||
void execute(FEngine& engine, const char* name,
|
||||
static void execute(RenderPass const& pass,
|
||||
FEngine& engine, const char* name,
|
||||
backend::Handle<backend::HwRenderTarget> renderTarget,
|
||||
backend::RenderPassParams params) const noexcept;
|
||||
backend::RenderPassParams params) noexcept;
|
||||
|
||||
/*
|
||||
* Executor holds the range of commands to execute for a given pass
|
||||
@@ -331,6 +311,7 @@ public:
|
||||
class Executor {
|
||||
using CustomCommandFn = std::function<void()>;
|
||||
friend class RenderPass;
|
||||
friend class RenderPassBuilder;
|
||||
|
||||
// these fields are constant after creation
|
||||
utils::Slice<Command> mCommands;
|
||||
@@ -346,8 +327,7 @@ public:
|
||||
|
||||
Executor(RenderPass const* pass, Command const* b, Command const* e) noexcept;
|
||||
|
||||
void execute(backend::DriverApi& driver,
|
||||
const Command* first, const Command* last) const noexcept;
|
||||
void execute(FEngine& engine, const Command* first, const Command* last) const noexcept;
|
||||
|
||||
public:
|
||||
Executor() = default;
|
||||
@@ -366,37 +346,39 @@ public:
|
||||
};
|
||||
|
||||
// returns a new executor for this pass
|
||||
Executor getExecutor() {
|
||||
return { this, mCommandBegin, mCommandEnd };
|
||||
}
|
||||
|
||||
Executor getExecutor() const {
|
||||
return { this, mCommandBegin, mCommandEnd };
|
||||
}
|
||||
|
||||
// returns a new executor for this pass with a custom range
|
||||
Executor getExecutor(Command const* b, Command const* e) {
|
||||
return { this, b, e };
|
||||
}
|
||||
|
||||
Executor getExecutor(Command const* b, Command const* e) const {
|
||||
return { this, b, e };
|
||||
}
|
||||
|
||||
// Appends a custom command.
|
||||
void appendCustomCommand(uint8_t channel, Pass pass, CustomCommand custom, uint32_t order,
|
||||
Executor::CustomCommandFn command);
|
||||
|
||||
|
||||
private:
|
||||
friend class FRenderer;
|
||||
friend class RenderPassBuilder;
|
||||
RenderPass(FEngine& engine, RenderPassBuilder const& builder) noexcept;
|
||||
|
||||
Command* append(size_t count) noexcept;
|
||||
void resize(size_t count) noexcept;
|
||||
void instanceify(FEngine& engine) noexcept;
|
||||
// This is the main function of this class, this appends commands to the pass using
|
||||
// the current camera, geometry and flags set. This can be called multiple times if needed.
|
||||
void appendCommands(FEngine& engine,
|
||||
utils::Slice<Command> commands, CommandTypeFlags commandTypeFlags) noexcept;
|
||||
|
||||
// we choose the command count per job to minimize JobSystem overhead.
|
||||
// on a Pixel 4, 2048 commands is about half a millisecond of processing.
|
||||
// Appends a custom command.
|
||||
void appendCustomCommand(Command* commands,
|
||||
uint8_t channel, Pass pass, CustomCommand custom, uint32_t order,
|
||||
Executor::CustomCommandFn command);
|
||||
|
||||
void resize(Arena& arena, size_t count) noexcept;
|
||||
|
||||
// sorts commands then trims sentinels
|
||||
void sortCommands(Arena& arena) noexcept;
|
||||
|
||||
// instanceify commands then trims sentinels
|
||||
void instanceify(FEngine& engine, Arena& arena) noexcept;
|
||||
|
||||
// We choose the command count per job to minimize JobSystem overhead.
|
||||
// On a Pixel 4, 2048 commands is about half a millisecond of processing.
|
||||
static constexpr size_t JOBS_PARALLEL_FOR_COMMANDS_COUNT = 2048;
|
||||
static constexpr size_t JOBS_PARALLEL_FOR_COMMANDS_SIZE =
|
||||
sizeof(Command) * JOBS_PARALLEL_FOR_COMMANDS_COUNT;
|
||||
@@ -404,15 +386,15 @@ private:
|
||||
static_assert(JOBS_PARALLEL_FOR_COMMANDS_SIZE % utils::CACHELINE_SIZE == 0,
|
||||
"Size of Commands jobs must be multiple of a cache-line size");
|
||||
|
||||
static inline void generateCommands(uint32_t commandTypeFlags, Command* commands,
|
||||
static inline void generateCommands(CommandTypeFlags commandTypeFlags, Command* commands,
|
||||
FScene::RenderableSoa const& soa, utils::Range<uint32_t> range,
|
||||
Variant variant, RenderFlags renderFlags,
|
||||
FScene::VisibleMaskType visibilityMask,
|
||||
math::float3 cameraPosition, math::float3 cameraForward,
|
||||
uint8_t instancedStereoEyeCount) noexcept;
|
||||
|
||||
template<uint32_t commandTypeFlags>
|
||||
static inline Command* generateCommandsImpl(uint32_t extraFlags, Command* curr,
|
||||
template<RenderPass::CommandTypeFlags commandTypeFlags>
|
||||
static inline Command* generateCommandsImpl(RenderPass::CommandTypeFlags extraFlags, Command* curr,
|
||||
FScene::RenderableSoa const& soa, utils::Range<uint32_t> range,
|
||||
Variant variant, RenderFlags renderFlags, FScene::VisibleMaskType visibilityMask,
|
||||
math::float3 cameraPosition, math::float3 cameraForward,
|
||||
@@ -424,50 +406,129 @@ private:
|
||||
static void updateSummedPrimitiveCounts(
|
||||
FScene::RenderableSoa& renderableData, utils::Range<uint32_t> vr) noexcept;
|
||||
|
||||
// a reference to the Engine, mostly to get to things like JobSystem
|
||||
|
||||
// Arena where all Commands are allocated. The Arena owns the commands.
|
||||
Arena& mCommandArena;
|
||||
FScene::RenderableSoa const& mRenderableSoa;
|
||||
utils::Range<uint32_t> const mVisibleRenderables;
|
||||
backend::Handle<backend::HwBufferObject> const mUboHandle;
|
||||
math::float3 const mCameraPosition;
|
||||
math::float3 const mCameraForwardVector;
|
||||
RenderFlags const mFlags;
|
||||
Variant const mVariant;
|
||||
FScene::VisibleMaskType const mVisibilityMask;
|
||||
backend::Viewport const mScissorViewport;
|
||||
|
||||
// Pointer to the first command
|
||||
Command* mCommandBegin = nullptr;
|
||||
|
||||
// Pointer to one past the last command
|
||||
Command* mCommandEnd = nullptr;
|
||||
|
||||
// the SOA containing the renderables we're interested in
|
||||
FScene::RenderableSoa const* mRenderableSoa = nullptr;
|
||||
|
||||
// The range of visible renderables in the SOA above
|
||||
utils::Range<uint32_t> mVisibleRenderables{};
|
||||
|
||||
// the UBO containing the data for the renderables
|
||||
backend::Handle<backend::HwBufferObject> mUboHandle;
|
||||
// a UBO for instanced primitives
|
||||
backend::Handle<backend::HwBufferObject> mInstancedUboHandle;
|
||||
|
||||
// info about the camera
|
||||
math::float3 mCameraPosition{};
|
||||
math::float3 mCameraForwardVector{};
|
||||
|
||||
// info about the scene features (e.g.: has shadows, lighting, etc...)
|
||||
RenderFlags mFlags{};
|
||||
|
||||
// Variant to use
|
||||
Variant mVariant{};
|
||||
|
||||
// Additional visibility mask
|
||||
FScene::VisibleMaskType mVisibilityMask = std::numeric_limits<FScene::VisibleMaskType>::max();
|
||||
|
||||
backend::Viewport mScissorViewport{ 0, 0,
|
||||
std::numeric_limits<int32_t>::max(),
|
||||
std::numeric_limits<int32_t>::max() };
|
||||
|
||||
// a vector for our custom commands
|
||||
using CustomCommandVector = std::vector<Executor::CustomCommandFn,
|
||||
utils::STLAllocator<Executor::CustomCommandFn, LinearAllocatorArena>>;
|
||||
mutable CustomCommandVector mCustomCommands;
|
||||
};
|
||||
|
||||
class RenderPassBuilder {
|
||||
friend class RenderPass;
|
||||
|
||||
RenderPass::Arena& mArena;
|
||||
RenderPass::CommandTypeFlags mCommandTypeFlags{};
|
||||
backend::Viewport mScissorViewport{ 0, 0, INT32_MAX, INT32_MAX };
|
||||
FScene::RenderableSoa const* mRenderableSoa = nullptr;
|
||||
utils::Range<uint32_t> mVisibleRenderables{};
|
||||
backend::Handle<backend::HwBufferObject> mUboHandle;
|
||||
math::float3 mCameraPosition{};
|
||||
math::float3 mCameraForwardVector{};
|
||||
RenderPass::RenderFlags mFlags{};
|
||||
Variant mVariant{};
|
||||
FScene::VisibleMaskType mVisibilityMask = std::numeric_limits<FScene::VisibleMaskType>::max();
|
||||
|
||||
using CustomCommandRecord = std::tuple<
|
||||
uint8_t,
|
||||
RenderPass::Pass,
|
||||
RenderPass::CustomCommand,
|
||||
uint32_t,
|
||||
RenderPass::Executor::CustomCommandFn>;
|
||||
|
||||
using CustomCommandContainer = std::vector<CustomCommandRecord,
|
||||
utils::STLAllocator<CustomCommandRecord, LinearAllocatorArena>>;
|
||||
|
||||
// we make this optional because it's not used often, and we don't want to have
|
||||
// to construct it by default.
|
||||
std::optional<CustomCommandContainer> mCustomCommands;
|
||||
|
||||
public:
|
||||
explicit RenderPassBuilder(RenderPass::Arena& arena) : mArena(arena) { }
|
||||
|
||||
RenderPassBuilder& commandTypeFlags(RenderPass::CommandTypeFlags commandTypeFlags) noexcept {
|
||||
mCommandTypeFlags = commandTypeFlags;
|
||||
return *this;
|
||||
}
|
||||
|
||||
RenderPassBuilder& scissorViewport(backend::Viewport viewport) noexcept {
|
||||
mScissorViewport = viewport;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// specifies the geometry to generate commands for
|
||||
RenderPassBuilder& geometry(FScene::RenderableSoa const& soa, utils::Range<uint32_t> vr,
|
||||
backend::Handle<backend::HwBufferObject> uboHandle) noexcept {
|
||||
mRenderableSoa = &soa;
|
||||
mVisibleRenderables = vr;
|
||||
mUboHandle = uboHandle;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Specifies camera information (e.g. used for sorting commands)
|
||||
RenderPassBuilder& camera(const CameraInfo& camera) noexcept {
|
||||
mCameraPosition = camera.getPosition();
|
||||
mCameraForwardVector = camera.getForwardVector();
|
||||
return *this;
|
||||
}
|
||||
|
||||
// flags controlling how commands are generated
|
||||
RenderPassBuilder& renderFlags(RenderPass::RenderFlags flags) noexcept {
|
||||
mFlags = flags;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// like above but allows to set specific flags
|
||||
RenderPassBuilder& renderFlags(
|
||||
RenderPass::RenderFlags mask, RenderPass::RenderFlags value) noexcept {
|
||||
mFlags = (mFlags & mask) | (value & mask);
|
||||
return *this;
|
||||
}
|
||||
|
||||
// variant to use
|
||||
RenderPassBuilder& variant(Variant variant) noexcept {
|
||||
mVariant = variant;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Sets the visibility mask, which is AND-ed against each Renderable's VISIBLE_MASK to
|
||||
// determine if the renderable is visible for this pass.
|
||||
// Defaults to all 1's, which means all renderables in this render pass will be rendered.
|
||||
RenderPassBuilder& visibilityMask(FScene::VisibleMaskType mask) noexcept {
|
||||
mVisibilityMask = mask;
|
||||
return *this;
|
||||
}
|
||||
|
||||
RenderPassBuilder& customCommand(FEngine& engine,
|
||||
uint8_t channel,
|
||||
RenderPass::Pass pass,
|
||||
RenderPass::CustomCommand custom,
|
||||
uint32_t order,
|
||||
const RenderPass::Executor::CustomCommandFn& command);
|
||||
|
||||
RenderPass build(FEngine& engine);
|
||||
};
|
||||
|
||||
|
||||
} // namespace filament
|
||||
|
||||
template<> struct utils::EnableBitMaskOperators<filament::RenderPass::CommandTypeFlags>
|
||||
: public std::true_type {};
|
||||
|
||||
#endif // TNT_FILAMENT_RENDERPASS_H
|
||||
|
||||
@@ -228,10 +228,6 @@ FrameGraphId<FrameGraphTexture> RendererUtils::colorPass(
|
||||
out.params.subpassMask = 1;
|
||||
}
|
||||
|
||||
// this is a good time to flush the CommandStream, because we're about to potentially
|
||||
// output a lot of commands. This guarantees here that we have at least
|
||||
// FILAMENT_MIN_COMMAND_BUFFERS_SIZE_IN_MB bytes (1MiB by default).
|
||||
engine.flush();
|
||||
driver.beginRenderPass(out.target, out.params);
|
||||
passExecutor.execute(engine, resources.getPassName());
|
||||
driver.endRenderPass();
|
||||
|
||||
@@ -19,14 +19,21 @@
|
||||
#include "RenderPass.h"
|
||||
#include "ShadowMap.h"
|
||||
|
||||
#include "details/DebugRegistry.h"
|
||||
#include "details/Texture.h"
|
||||
#include "details/View.h"
|
||||
|
||||
#include <fg/FrameGraph.h>
|
||||
|
||||
#include <backend/DriverEnums.h>
|
||||
|
||||
#include <utils/compiler.h>
|
||||
#include <utils/debug.h>
|
||||
#include <utils/FixedCapacityVector.h>
|
||||
|
||||
#include <new>
|
||||
#include <memory>
|
||||
|
||||
namespace filament {
|
||||
|
||||
using namespace backend;
|
||||
@@ -128,7 +135,8 @@ void ShadowMapManager::addShadowMap(size_t lightIndex, bool spotlight,
|
||||
}
|
||||
|
||||
FrameGraphId<FrameGraphTexture> ShadowMapManager::render(FEngine& engine, FrameGraph& fg,
|
||||
RenderPass const& pass, FView& view, CameraInfo const& mainCameraInfo,
|
||||
RenderPassBuilder const& passBuilder,
|
||||
FView& view, CameraInfo const& mainCameraInfo,
|
||||
float4 const& userTime) noexcept {
|
||||
|
||||
const float moment2 = std::numeric_limits<half>::max();
|
||||
@@ -206,8 +214,8 @@ FrameGraphId<FrameGraphTexture> ShadowMapManager::render(FEngine& engine, FrameG
|
||||
builder.sideEffect();
|
||||
},
|
||||
[this, &engine, &view, vsmShadowOptions,
|
||||
scene, mainCameraInfo, userTime, passTemplate = pass](
|
||||
FrameGraphResources const&, auto const& data, DriverApi& driver) {
|
||||
scene, mainCameraInfo, userTime, passBuilder = passBuilder](
|
||||
FrameGraphResources const&, auto const& data, DriverApi& driver) mutable {
|
||||
|
||||
// Note: we could almost parallel_for the loop below, the problem currently is
|
||||
// that updatePrimitivesLod() updates temporary global state.
|
||||
@@ -262,19 +270,20 @@ FrameGraphId<FrameGraphTexture> ShadowMapManager::render(FEngine& engine, FrameG
|
||||
cameraInfo, scene->getRenderableData(), entry.range);
|
||||
|
||||
// generate and sort the commands for rendering the shadow map
|
||||
RenderPass pass(passTemplate);
|
||||
pass.setCamera(cameraInfo);
|
||||
pass.setVisibilityMask(entry.visibilityMask);
|
||||
pass.setGeometry(scene->getRenderableData(),
|
||||
entry.range, scene->getRenderableUBO());
|
||||
pass.appendCommands(engine, RenderPass::SHADOW);
|
||||
pass.sortCommands(engine);
|
||||
|
||||
RenderPass const pass = passBuilder
|
||||
.camera(cameraInfo)
|
||||
.visibilityMask(entry.visibilityMask)
|
||||
.geometry(scene->getRenderableData(),
|
||||
entry.range, scene->getRenderableUBO())
|
||||
.commandTypeFlags(RenderPass::CommandTypeFlags::SHADOW)
|
||||
.build(engine);
|
||||
|
||||
entry.executor = pass.getExecutor();
|
||||
|
||||
if (!view.hasVSM()) {
|
||||
auto const* options = shadowMap.getShadowOptions();
|
||||
const PolygonOffset polygonOffset = { // handle reversed Z
|
||||
PolygonOffset const polygonOffset = { // handle reversed Z
|
||||
.slope = -options->polygonOffsetSlope,
|
||||
.constant = -options->polygonOffsetConstant
|
||||
};
|
||||
@@ -395,7 +404,6 @@ FrameGraphId<FrameGraphTexture> ShadowMapManager::render(FEngine& engine, FrameG
|
||||
|
||||
auto rt = resources.getRenderPassInfo(data.rt);
|
||||
|
||||
engine.flush();
|
||||
driver.beginRenderPass(rt.target, rt.params);
|
||||
entry.shadowMap->bind(driver);
|
||||
entry.executor.overrideScissor(entry.shadowMap->getScissor());
|
||||
|
||||
@@ -43,6 +43,7 @@ namespace filament {
|
||||
class FView;
|
||||
class FrameGraph;
|
||||
class RenderPass;
|
||||
class RenderPassBuilder;
|
||||
|
||||
struct ShadowMappingUniforms {
|
||||
math::float4 cascadeSplits;
|
||||
@@ -86,7 +87,8 @@ public:
|
||||
FScene::RenderableSoa& renderableData, FScene::LightSoa const& lightData) noexcept;
|
||||
|
||||
// Renders all the shadow maps.
|
||||
FrameGraphId<FrameGraphTexture> render(FEngine& engine, FrameGraph& fg, RenderPass const& pass,
|
||||
FrameGraphId<FrameGraphTexture> render(FEngine& engine, FrameGraph& fg,
|
||||
RenderPassBuilder const& passBuilder,
|
||||
FView& view, CameraInfo const& mainCameraInfo, math::float4 const& userTime) noexcept;
|
||||
|
||||
// valid after calling update() above
|
||||
|
||||
@@ -198,7 +198,7 @@ FEngine::FEngine(Engine::Builder const& builder) :
|
||||
mCommandBufferQueue(
|
||||
builder->mConfig.minCommandBufferSizeMB * MiB,
|
||||
builder->mConfig.commandBufferSizeMB * MiB),
|
||||
mPerRenderPassAllocator(
|
||||
mPerRenderPassArena(
|
||||
"FEngine::mPerRenderPassAllocator",
|
||||
builder->mConfig.perRenderPassArenaSizeMB * MiB),
|
||||
mHeapAllocator("FEngine::mHeapAllocator", AreaPolicy::NullArea{}),
|
||||
|
||||
@@ -58,17 +58,6 @@
|
||||
#include <filament/Texture.h>
|
||||
#include <filament/VertexBuffer.h>
|
||||
|
||||
#if FILAMENT_ENABLE_MATDBG
|
||||
#include <matdbg/DebugServer.h>
|
||||
#else
|
||||
namespace filament {
|
||||
namespace matdbg {
|
||||
class DebugServer;
|
||||
using MaterialKey = uint32_t;
|
||||
} // namespace matdbg
|
||||
} // namespace filament
|
||||
#endif
|
||||
|
||||
#include <utils/compiler.h>
|
||||
#include <utils/Allocator.h>
|
||||
#include <utils/JobSystem.h>
|
||||
@@ -78,8 +67,19 @@ using MaterialKey = uint32_t;
|
||||
#include <memory>
|
||||
#include <new>
|
||||
#include <random>
|
||||
#include <thread>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
|
||||
#if FILAMENT_ENABLE_MATDBG
|
||||
#include <matdbg/DebugServer.h>
|
||||
#else
|
||||
namespace filament::matdbg {
|
||||
class DebugServer;
|
||||
using MaterialKey = uint32_t;
|
||||
} // namespace filament::matdbg
|
||||
#endif
|
||||
|
||||
namespace filament {
|
||||
|
||||
class Renderer;
|
||||
@@ -142,7 +142,7 @@ public:
|
||||
// the per-frame Area is used by all Renderer, so they must run in sequence and
|
||||
// have freed all allocated memory when done. If this needs to change in the future,
|
||||
// we'll simply have to use separate Areas (for instance).
|
||||
LinearAllocatorArena& getPerRenderPassAllocator() noexcept { return mPerRenderPassAllocator; }
|
||||
LinearAllocatorArena& getPerRenderPassArena() noexcept { return mPerRenderPassArena; }
|
||||
|
||||
// Material IDs...
|
||||
uint32_t getMaterialId() const noexcept { return mMaterialId++; }
|
||||
@@ -508,7 +508,7 @@ private:
|
||||
|
||||
uint32_t mFlushCounter = 0;
|
||||
|
||||
LinearAllocatorArena mPerRenderPassAllocator;
|
||||
RootArenaScope::Arena mPerRenderPassArena;
|
||||
HeapAllocatorArena mHeapAllocator;
|
||||
|
||||
utils::JobSystem mJobSystem;
|
||||
|
||||
@@ -16,6 +16,9 @@
|
||||
|
||||
#include "details/Renderer.h"
|
||||
|
||||
#include "Allocators.h"
|
||||
#include "DebugRegistry.h"
|
||||
#include "FrameHistory.h"
|
||||
#include "PostProcessManager.h"
|
||||
#include "RendererUtils.h"
|
||||
#include "RenderPass.h"
|
||||
@@ -28,21 +31,40 @@
|
||||
#include "details/Texture.h"
|
||||
#include "details/View.h"
|
||||
|
||||
#include <filament/Camera.h>
|
||||
#include <filament/Fence.h>
|
||||
#include <filament/Options.h>
|
||||
#include <filament/Renderer.h>
|
||||
|
||||
#include <backend/DriverEnums.h>
|
||||
#include <backend/DriverApiForward.h>
|
||||
#include <backend/Handle.h>
|
||||
#include <backend/PixelBufferDescriptor.h>
|
||||
|
||||
#include "fg/FrameGraph.h"
|
||||
#include "fg/FrameGraphId.h"
|
||||
#include "fg/FrameGraphResources.h"
|
||||
#include "fg/FrameGraphTexture.h"
|
||||
|
||||
#include <math/vec2.h>
|
||||
#include <math/vec3.h>
|
||||
#include <math/mat4.h>
|
||||
|
||||
#include <utils/compiler.h>
|
||||
#include <utils/JobSystem.h>
|
||||
#include <utils/Log.h>
|
||||
#include <utils/ostream.h>
|
||||
#include <utils/Panic.h>
|
||||
#include <utils/Systrace.h>
|
||||
#include <utils/vector.h>
|
||||
#include <utils/debug.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
// this helps visualize what dynamic-scaling is doing
|
||||
#define DEBUG_DYNAMIC_SCALING false
|
||||
|
||||
@@ -62,8 +84,7 @@ FRenderer::FRenderer(FEngine& engine) :
|
||||
mHdrQualityMedium(TextureFormat::R11F_G11F_B10F),
|
||||
mHdrQualityHigh(TextureFormat::RGB16F),
|
||||
mIsRGB8Supported(false),
|
||||
mUserEpoch(engine.getEngineEpoch()),
|
||||
mPerRenderPassArena(engine.getPerRenderPassAllocator())
|
||||
mUserEpoch(engine.getEngineEpoch())
|
||||
{
|
||||
FDebugRegistry& debugRegistry = engine.getDebugRegistry();
|
||||
debugRegistry.registerProperty("d.renderer.doFrameCapture",
|
||||
@@ -442,7 +463,7 @@ void FRenderer::render(FView const* view) {
|
||||
|
||||
if (UTILS_LIKELY(view && view->getScene())) {
|
||||
if (mViewRenderedCount) {
|
||||
// this is a good place to kick the GPU, since we've rendered a View before,
|
||||
// This is a good place to kick the GPU, since we've rendered a View before,
|
||||
// and we're about to render another one.
|
||||
mEngine.getDriverApi().flush();
|
||||
}
|
||||
@@ -452,17 +473,17 @@ void FRenderer::render(FView const* view) {
|
||||
}
|
||||
|
||||
void FRenderer::renderInternal(FView const* view) {
|
||||
// per-renderpass data
|
||||
ArenaScope rootArena(mPerRenderPassArena);
|
||||
|
||||
FEngine& engine = mEngine;
|
||||
JobSystem& js = engine.getJobSystem();
|
||||
|
||||
// per-renderpass data
|
||||
RootArenaScope rootArenaScope(engine.getPerRenderPassArena());
|
||||
|
||||
// create a root job so no other job can escape
|
||||
JobSystem& js = engine.getJobSystem();
|
||||
auto *rootJob = js.setRootJob(js.createJob());
|
||||
|
||||
// execute the render pass
|
||||
renderJob(rootArena, const_cast<FView&>(*view));
|
||||
renderJob(rootArenaScope, const_cast<FView&>(*view));
|
||||
|
||||
// make sure to flush the command buffer
|
||||
engine.flush();
|
||||
@@ -471,7 +492,7 @@ void FRenderer::renderInternal(FView const* view) {
|
||||
js.runAndWait(rootJob);
|
||||
}
|
||||
|
||||
void FRenderer::renderJob(ArenaScope& arena, FView& view) {
|
||||
void FRenderer::renderJob(RootArenaScope& rootArenaScope, FView& view) {
|
||||
FEngine& engine = mEngine;
|
||||
JobSystem& js = engine.getJobSystem();
|
||||
FEngine::DriverApi& driver = engine.getDriverApi();
|
||||
@@ -636,7 +657,7 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
|
||||
xvp.bottom = int32_t(guardBand);
|
||||
}
|
||||
|
||||
view.prepare(engine, driver, arena, svp, cameraInfo, getShaderUserTime(), needsAlphaChannel);
|
||||
view.prepare(engine, driver, rootArenaScope, svp, cameraInfo, getShaderUserTime(), needsAlphaChannel);
|
||||
|
||||
view.prepareUpscaler(scale, taaOptions, dsrOptions);
|
||||
|
||||
@@ -649,8 +670,10 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
|
||||
// Allocate some space for our commands in the per-frame Arena, and use that space as
|
||||
// an Arena for commands. All this space is released when we exit this method.
|
||||
size_t const perFrameCommandsSize = engine.getPerFrameCommandsSize();
|
||||
void* const arenaBegin = arena.allocate(perFrameCommandsSize, CACHELINE_SIZE);
|
||||
void* const arenaBegin = rootArenaScope.allocate(perFrameCommandsSize, CACHELINE_SIZE);
|
||||
void* const arenaEnd = pointermath::add(arenaBegin, perFrameCommandsSize);
|
||||
|
||||
// This arena *must* stay valid until all commands have been processed
|
||||
RenderPass::Arena commandArena("Command Arena", { arenaBegin, arenaEnd });
|
||||
|
||||
RenderPass::RenderFlags renderFlags = 0;
|
||||
@@ -658,8 +681,8 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
|
||||
if (view.isFrontFaceWindingInverted()) renderFlags |= RenderPass::HAS_INVERSE_FRONT_FACES;
|
||||
if (view.hasInstancedStereo()) renderFlags |= RenderPass::IS_STEREOSCOPIC;
|
||||
|
||||
RenderPass pass(engine, commandArena);
|
||||
pass.setRenderFlags(renderFlags);
|
||||
RenderPassBuilder passBuilder(commandArena);
|
||||
passBuilder.renderFlags(renderFlags);
|
||||
|
||||
Variant variant;
|
||||
variant.setDirectionalLighting(view.hasDirectionalLight());
|
||||
@@ -682,10 +705,10 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
|
||||
if (view.needsShadowMap()) {
|
||||
Variant shadowVariant(Variant::DEPTH_VARIANT);
|
||||
shadowVariant.setVsm(view.getShadowType() == ShadowType::VSM);
|
||||
|
||||
RenderPass shadowPass(pass);
|
||||
shadowPass.setVariant(shadowVariant);
|
||||
auto shadows = view.renderShadowMaps(engine, fg, cameraInfo, mShaderUserTime, shadowPass);
|
||||
auto shadows = view.renderShadowMaps(engine, fg, cameraInfo, mShaderUserTime,
|
||||
RenderPassBuilder{ commandArena }
|
||||
.renderFlags(renderFlags)
|
||||
.variant(shadowVariant));
|
||||
blackboard["shadows"] = shadows;
|
||||
}
|
||||
|
||||
@@ -771,8 +794,9 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
|
||||
view.updatePrimitivesLod(engine, cameraInfo,
|
||||
scene.getRenderableData(), view.getVisibleRenderables());
|
||||
|
||||
pass.setCamera(cameraInfo);
|
||||
pass.setGeometry(scene.getRenderableData(), view.getVisibleRenderables(), scene.getRenderableUBO());
|
||||
passBuilder.camera(cameraInfo);
|
||||
passBuilder.geometry(scene.getRenderableData(),
|
||||
view.getVisibleRenderables(), scene.getRenderableUBO());
|
||||
|
||||
// view set-ups that need to happen before rendering
|
||||
fg.addTrivialSideEffectPass("Prepare View Uniforms",
|
||||
@@ -818,7 +842,8 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
|
||||
// This is normally used by SSAO and contact-shadows
|
||||
|
||||
// TODO: the scaling should depends on all passes that need the structure pass
|
||||
const auto [structure, picking_] = ppm.structure(fg, pass, renderFlags, svp.width, svp.height, {
|
||||
const auto [structure, picking_] = ppm.structure(fg,
|
||||
passBuilder, renderFlags, svp.width, svp.height, {
|
||||
.scale = aoOptions.resolution,
|
||||
.picking = view.hasPicking()
|
||||
});
|
||||
@@ -876,7 +901,7 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
|
||||
// screen-space reflections pass
|
||||
|
||||
if (ssReflectionsOptions.enabled) {
|
||||
auto reflections = ppm.ssr(fg, pass,
|
||||
auto reflections = ppm.ssr(fg, passBuilder,
|
||||
view.getFrameHistory(), cameraInfo,
|
||||
view.getPerViewUniforms(),
|
||||
structure,
|
||||
@@ -894,10 +919,15 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
|
||||
// --------------------------------------------------------------------------------------------
|
||||
// Color passes
|
||||
|
||||
// this makes the viewport relative to xvp
|
||||
// FIXME: we should use 'vp' when rendering directly into the swapchain, but that's hard to
|
||||
// know at this point. This will usually be the case when post-process is disabled.
|
||||
// FIXME: we probably should take the dynamic scaling into account too
|
||||
passBuilder.scissorViewport(hasPostProcess ? xvp : vp);
|
||||
|
||||
// This one doesn't need to be a FrameGraph pass because it always happens by construction
|
||||
// (i.e. it won't be culled, unless everything is culled), so no need to complexify things.
|
||||
pass.setVariant(variant);
|
||||
pass.appendCommands(engine, RenderPass::COLOR);
|
||||
passBuilder.variant(variant);
|
||||
|
||||
// color-grading as subpass is done either by the color pass or the TAA pass if any
|
||||
auto colorGradingConfigForColor = colorGradingConfig;
|
||||
@@ -905,7 +935,7 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
|
||||
|
||||
if (colorGradingConfigForColor.asSubpass) {
|
||||
// append color grading subpass after all other passes
|
||||
pass.appendCustomCommand(3,
|
||||
passBuilder.customCommand(engine, 3,
|
||||
RenderPass::Pass::BLENDED,
|
||||
RenderPass::CustomCommand::EPILOG,
|
||||
0, [&ppm, &driver, colorGradingConfigForColor]() {
|
||||
@@ -913,7 +943,7 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
|
||||
});
|
||||
} else if (colorGradingConfig.customResolve) {
|
||||
// append custom resolve subpass after all other passes
|
||||
pass.appendCustomCommand(3,
|
||||
passBuilder.customCommand(engine, 3,
|
||||
RenderPass::Pass::BLENDED,
|
||||
RenderPass::CustomCommand::EPILOG,
|
||||
0, [&ppm, &driver]() {
|
||||
@@ -921,16 +951,9 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) {
|
||||
});
|
||||
}
|
||||
|
||||
// sort commands once we're done adding commands
|
||||
pass.sortCommands(engine);
|
||||
|
||||
|
||||
// this makes the viewport relative to xvp
|
||||
// FIXME: we should use 'vp' when rendering directly into the swapchain, but that's hard to
|
||||
// know at this point. This will usually be the case when post-process is disabled.
|
||||
// FIXME: we probably should take the dynamic scaling into account too
|
||||
pass.setScissorViewport(hasPostProcess ? xvp : vp);
|
||||
passBuilder.commandTypeFlags(RenderPass::CommandTypeFlags::COLOR);
|
||||
|
||||
RenderPass const pass{ passBuilder.build(engine) };
|
||||
|
||||
FrameGraphTexture::Descriptor const desc = {
|
||||
.width = config.physicalViewport.width,
|
||||
|
||||
@@ -163,7 +163,7 @@ private:
|
||||
}
|
||||
|
||||
void renderInternal(FView const* view);
|
||||
void renderJob(ArenaScope& arena, FView& view);
|
||||
void renderJob(RootArenaScope& rootArenaScope, FView& view);
|
||||
|
||||
// keep a reference to our engine
|
||||
FEngine& mEngine;
|
||||
@@ -187,9 +187,6 @@ private:
|
||||
backend::TargetBufferFlags mClearFlags{};
|
||||
tsl::robin_set<FRenderTarget*> mPreviousRenderTargets;
|
||||
std::function<void()> mBeginFrameInternal;
|
||||
|
||||
// per-frame arena for this Renderer
|
||||
LinearAllocatorArena& mPerRenderPassArena;
|
||||
};
|
||||
|
||||
FILAMENT_DOWNCAST(Renderer)
|
||||
|
||||
@@ -53,7 +53,7 @@ FScene::~FScene() noexcept = default;
|
||||
|
||||
|
||||
void FScene::prepare(utils::JobSystem& js,
|
||||
LinearAllocatorArena& allocator,
|
||||
RootArenaScope& rootArenaScope,
|
||||
mat4 const& worldTransform,
|
||||
bool shadowReceiversAreCasters) noexcept {
|
||||
// TODO: can we skip this in most cases? Since we rely on indices staying the same,
|
||||
@@ -64,7 +64,7 @@ void FScene::prepare(utils::JobSystem& js,
|
||||
SYSTRACE_CONTEXT();
|
||||
|
||||
// This will reset the allocator upon exiting
|
||||
ArenaScope const arena(allocator);
|
||||
ArenaScope<RootArenaScope::Arena> localArenaScope(rootArenaScope.getArena());
|
||||
|
||||
FEngine& engine = mEngine;
|
||||
EntityManager const& em = engine.getEntityManager();
|
||||
@@ -85,10 +85,10 @@ void FScene::prepare(utils::JobSystem& js,
|
||||
utils::STLAllocator< LightContainerData, LinearAllocatorArena >, false>;
|
||||
|
||||
RenderableInstanceContainer renderableInstances{
|
||||
RenderableInstanceContainer::with_capacity(entities.size(), allocator) };
|
||||
RenderableInstanceContainer::with_capacity(entities.size(), localArenaScope.getArena()) };
|
||||
|
||||
LightInstanceContainer lightInstances{
|
||||
LightInstanceContainer::with_capacity(entities.size(), allocator) };
|
||||
LightInstanceContainer::with_capacity(entities.size(), localArenaScope.getArena()) };
|
||||
|
||||
SYSTRACE_NAME_BEGIN("InstanceLoop");
|
||||
|
||||
@@ -454,7 +454,7 @@ void FScene::terminate(FEngine&) {
|
||||
mRenderableViewUbh.clear();
|
||||
}
|
||||
|
||||
void FScene::prepareDynamicLights(const CameraInfo& camera, ArenaScope&,
|
||||
void FScene::prepareDynamicLights(const CameraInfo& camera,
|
||||
Handle<HwBufferObject> lightUbh) noexcept {
|
||||
FEngine::DriverApi& driver = mEngine.getDriverApi();
|
||||
FLightManager const& lcm = mEngine.getLightManager();
|
||||
|
||||
@@ -31,6 +31,8 @@
|
||||
#include <filament/Box.h>
|
||||
#include <filament/Scene.h>
|
||||
|
||||
#include <math/mathfwd.h>
|
||||
|
||||
#include <utils/compiler.h>
|
||||
#include <utils/Entity.h>
|
||||
#include <utils/Slice.h>
|
||||
@@ -70,12 +72,12 @@ public:
|
||||
~FScene() noexcept;
|
||||
void terminate(FEngine& engine);
|
||||
|
||||
void prepare(utils::JobSystem& js, LinearAllocatorArena& allocator,
|
||||
void prepare(utils::JobSystem& js, RootArenaScope& rootArenaScope,
|
||||
math::mat4 const& worldTransform, bool shadowReceiversAreCasters) noexcept;
|
||||
|
||||
void prepareVisibleRenderables(utils::Range<uint32_t> visibleRenderables) noexcept;
|
||||
|
||||
void prepareDynamicLights(const CameraInfo& camera, ArenaScope& arena,
|
||||
void prepareDynamicLights(const CameraInfo& camera,
|
||||
backend::Handle<backend::HwBufferObject> lightUbh) noexcept;
|
||||
|
||||
backend::Handle<backend::HwBufferObject> getRenderableUBO() const noexcept {
|
||||
|
||||
@@ -341,8 +341,7 @@ void FView::prepareShadowing(FEngine& engine, FScene::RenderableSoa& renderableD
|
||||
mNeedsShadowMap = any(shadowTechnique & ShadowMapManager::ShadowTechnique::SHADOW_MAP);
|
||||
}
|
||||
|
||||
void FView::prepareLighting(FEngine& engine, ArenaScope& arena,
|
||||
CameraInfo const& cameraInfo) noexcept {
|
||||
void FView::prepareLighting(FEngine& engine, CameraInfo const& cameraInfo) noexcept {
|
||||
SYSTRACE_CALL();
|
||||
SYSTRACE_CONTEXT();
|
||||
|
||||
@@ -354,7 +353,7 @@ void FView::prepareLighting(FEngine& engine, ArenaScope& arena,
|
||||
*/
|
||||
|
||||
if (hasDynamicLighting()) {
|
||||
scene->prepareDynamicLights(cameraInfo, arena, mLightUbh);
|
||||
scene->prepareDynamicLights(cameraInfo, mLightUbh);
|
||||
}
|
||||
|
||||
// here the array of visible lights has been shrunk to CONFIG_MAX_LIGHT_COUNT
|
||||
@@ -427,7 +426,7 @@ CameraInfo FView::computeCameraInfo(FEngine& engine) const noexcept {
|
||||
return { *camera, mat4{ rotation } * mat4::translation(translation) };
|
||||
}
|
||||
|
||||
void FView::prepare(FEngine& engine, DriverApi& driver, ArenaScope& arena,
|
||||
void FView::prepare(FEngine& engine, DriverApi& driver, RootArenaScope& rootArenaScope,
|
||||
filament::Viewport viewport, CameraInfo cameraInfo,
|
||||
float4 const& userTime, bool needsAlphaChannel) noexcept {
|
||||
|
||||
@@ -465,7 +464,7 @@ void FView::prepare(FEngine& engine, DriverApi& driver, ArenaScope& arena,
|
||||
* Gather all information needed to render this scene. Apply the world origin to all
|
||||
* objects in the scene.
|
||||
*/
|
||||
scene->prepare(js, arena.getAllocator(),
|
||||
scene->prepare(js, rootArenaScope,
|
||||
cameraInfo.worldTransform,
|
||||
hasVSM());
|
||||
|
||||
@@ -475,14 +474,22 @@ void FView::prepare(FEngine& engine, DriverApi& driver, ArenaScope& arena,
|
||||
|
||||
JobSystem::Job* froxelizeLightsJob = nullptr;
|
||||
JobSystem::Job* prepareVisibleLightsJob = nullptr;
|
||||
if (scene->getLightData().size() > FScene::DIRECTIONAL_LIGHTS_COUNT) {
|
||||
size_t const lightCount = scene->getLightData().size();
|
||||
if (lightCount > FScene::DIRECTIONAL_LIGHTS_COUNT) {
|
||||
// create and start the prepareVisibleLights job
|
||||
// note: this job updates LightData (non const)
|
||||
// allocate a scratch buffer for distances outside the job below, so we don't need
|
||||
// to use a locked allocator; the downside is that we need to account for the worst case.
|
||||
size_t const positionalLightCount = lightCount - FScene::DIRECTIONAL_LIGHTS_COUNT;
|
||||
float* const distances = rootArenaScope.allocate<float>(
|
||||
(positionalLightCount + 3u) & ~3u, CACHELINE_SIZE);
|
||||
|
||||
prepareVisibleLightsJob = js.runAndRetain(js.createJob(nullptr,
|
||||
[&engine, &arena, &viewMatrix = cameraInfo.view, &cullingFrustum,
|
||||
[&engine, distances, positionalLightCount, &viewMatrix = cameraInfo.view, &cullingFrustum,
|
||||
&lightData = scene->getLightData()]
|
||||
(JobSystem&, JobSystem::Job*) {
|
||||
FView::prepareVisibleLights(engine.getLightManager(), arena,
|
||||
FView::prepareVisibleLights(engine.getLightManager(),
|
||||
{ distances, distances + positionalLightCount },
|
||||
viewMatrix, cullingFrustum, lightData);
|
||||
}));
|
||||
}
|
||||
@@ -530,7 +537,7 @@ void FView::prepare(FEngine& engine, DriverApi& driver, ArenaScope& arena,
|
||||
// As soon as prepareVisibleLight finishes, we can kick-off the froxelization
|
||||
if (hasDynamicLighting()) {
|
||||
auto& froxelizer = mFroxelizer;
|
||||
if (froxelizer.prepare(driver, arena, viewport,
|
||||
if (froxelizer.prepare(driver, rootArenaScope, viewport,
|
||||
cameraInfo.projection, cameraInfo.zn, cameraInfo.zf)) {
|
||||
// TODO: might be more consistent to do this in prepareLighting(), but it's not
|
||||
// strictly necessary
|
||||
@@ -645,7 +652,7 @@ void FView::prepare(FEngine& engine, DriverApi& driver, ArenaScope& arena,
|
||||
* Relies on FScene::prepare() and prepareVisibleLights()
|
||||
*/
|
||||
|
||||
prepareLighting(engine, arena, cameraInfo);
|
||||
prepareLighting(engine, cameraInfo);
|
||||
|
||||
/*
|
||||
* Update driver state
|
||||
@@ -850,7 +857,8 @@ void FView::cullRenderables(JobSystem&,
|
||||
functor(0, renderableData.size());
|
||||
}
|
||||
|
||||
void FView::prepareVisibleLights(FLightManager const& lcm, ArenaScope& rootArena,
|
||||
void FView::prepareVisibleLights(FLightManager const& lcm,
|
||||
utils::Slice<float> scratch,
|
||||
mat4f const& viewMatrix, Frustum const& frustum,
|
||||
FScene::LightSoa& lightData) noexcept {
|
||||
SYSTRACE_CALL();
|
||||
@@ -918,28 +926,25 @@ void FView::prepareVisibleLights(FLightManager const& lcm, ArenaScope& rootArena
|
||||
* - This helps our limited numbers of spot-shadow as well.
|
||||
*/
|
||||
|
||||
ArenaScope arena(rootArena.getAllocator());
|
||||
size_t const size = visibleLightCount;
|
||||
// number of point/spotlights
|
||||
size_t const positionalLightCount = size - FScene::DIRECTIONAL_LIGHTS_COUNT;
|
||||
size_t const positionalLightCount = visibleLightCount - FScene::DIRECTIONAL_LIGHTS_COUNT;
|
||||
if (positionalLightCount) {
|
||||
// always allocate at least 4 entries, because the vectorized loops below rely on that
|
||||
float* const UTILS_RESTRICT distances =
|
||||
arena.allocate<float>((size + 3u) & ~3u, CACHELINE_SIZE);
|
||||
|
||||
assert_invariant(positionalLightCount <= scratch.size());
|
||||
// pre-compute the lights' distance to the camera, for sorting below
|
||||
// - we don't skip the directional light, because we don't care, it's ignored during sorting
|
||||
float* const UTILS_RESTRICT distances = scratch.data();
|
||||
float4 const* const UTILS_RESTRICT spheres = lightData.data<FScene::POSITION_RADIUS>();
|
||||
computeLightCameraDistances(distances, viewMatrix, spheres, size);
|
||||
computeLightCameraDistances(distances, viewMatrix, spheres, visibleLightCount);
|
||||
|
||||
// skip directional light
|
||||
Zip2Iterator<FScene::LightSoa::iterator, float*> b = { lightData.begin(), distances };
|
||||
std::sort(b + FScene::DIRECTIONAL_LIGHTS_COUNT, b + size,
|
||||
std::sort(b + FScene::DIRECTIONAL_LIGHTS_COUNT, b + visibleLightCount,
|
||||
[](auto const& lhs, auto const& rhs) { return lhs.second < rhs.second; });
|
||||
}
|
||||
|
||||
// drop excess lights
|
||||
lightData.resize(std::min(size, CONFIG_MAX_LIGHT_COUNT + FScene::DIRECTIONAL_LIGHTS_COUNT));
|
||||
lightData.resize(std::min(visibleLightCount,
|
||||
CONFIG_MAX_LIGHT_COUNT + FScene::DIRECTIONAL_LIGHTS_COUNT));
|
||||
}
|
||||
|
||||
// These methods need to exist so clang honors the __restrict__ keyword, which in turn
|
||||
@@ -972,8 +977,9 @@ void FView::updatePrimitivesLod(FEngine& engine, const CameraInfo&,
|
||||
}
|
||||
|
||||
FrameGraphId<FrameGraphTexture> FView::renderShadowMaps(FEngine& engine, FrameGraph& fg,
|
||||
CameraInfo const& cameraInfo, float4 const& userTime, RenderPass const& pass) noexcept {
|
||||
return mShadowMapManager.render(engine, fg, pass, *this, cameraInfo, userTime);
|
||||
CameraInfo const& cameraInfo, float4 const& userTime,
|
||||
RenderPassBuilder const& passBuilder) noexcept {
|
||||
return mShadowMapManager.render(engine, fg, passBuilder, *this, cameraInfo, userTime);
|
||||
}
|
||||
|
||||
void FView::commitFrameHistory(FEngine& engine) noexcept {
|
||||
|
||||
@@ -88,7 +88,7 @@ public:
|
||||
|
||||
// note: viewport/cameraInfo are passed by value to make it clear that prepare cannot
|
||||
// keep references on them that would outlive the scope of prepare() (e.g. with JobSystem).
|
||||
void prepare(FEngine& engine, backend::DriverApi& driver, ArenaScope& arena,
|
||||
void prepare(FEngine& engine, backend::DriverApi& driver, RootArenaScope& rootArenaScope,
|
||||
filament::Viewport viewport, CameraInfo cameraInfo,
|
||||
math::float4 const& userTime, bool needsAlphaChannel) noexcept;
|
||||
|
||||
@@ -144,7 +144,7 @@ public:
|
||||
|
||||
void prepareShadowing(FEngine& engine, FScene::RenderableSoa& renderableData,
|
||||
FScene::LightSoa const& lightData, CameraInfo const& cameraInfo) noexcept;
|
||||
void prepareLighting(FEngine& engine, ArenaScope& arena, CameraInfo const& cameraInfo) noexcept;
|
||||
void prepareLighting(FEngine& engine, CameraInfo const& cameraInfo) noexcept;
|
||||
|
||||
void prepareSSAO(backend::Handle<backend::HwTexture> ssao) const noexcept;
|
||||
void prepareSSR(backend::Handle<backend::HwTexture> ssr, bool disableSSR,
|
||||
@@ -176,7 +176,7 @@ public:
|
||||
|
||||
FrameGraphId<FrameGraphTexture> renderShadowMaps(FEngine& engine, FrameGraph& fg,
|
||||
CameraInfo const& cameraInfo, math::float4 const& userTime,
|
||||
RenderPass const& pass) noexcept;
|
||||
RenderPassBuilder const& passBuilder) noexcept;
|
||||
|
||||
void updatePrimitivesLod(
|
||||
FEngine& engine, const CameraInfo& camera,
|
||||
@@ -460,7 +460,8 @@ private:
|
||||
void prepareVisibleRenderables(utils::JobSystem& js,
|
||||
Frustum const& frustum, FScene::RenderableSoa& renderableData) const noexcept;
|
||||
|
||||
static void prepareVisibleLights(FLightManager const& lcm, ArenaScope& rootArena,
|
||||
static void prepareVisibleLights(FLightManager const& lcm,
|
||||
utils::Slice<float> scratch,
|
||||
math::mat4f const& viewMatrix, Frustum const& frustum,
|
||||
FScene::LightSoa& lightData) noexcept;
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <vector>
|
||||
|
||||
namespace utils {
|
||||
|
||||
@@ -43,14 +44,14 @@ static inline P* add(P* a, T b) noexcept {
|
||||
template <typename P>
|
||||
static inline P* align(P* p, size_t alignment) noexcept {
|
||||
// alignment must be a power-of-two
|
||||
assert(alignment && !(alignment & alignment-1));
|
||||
assert_invariant(alignment && !(alignment & alignment-1));
|
||||
return (P*)((uintptr_t(p) + alignment - 1) & ~(alignment - 1));
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
static inline P* align(P* p, size_t alignment, size_t offset) noexcept {
|
||||
P* const r = align(add(p, offset), alignment);
|
||||
assert(r >= add(p, offset));
|
||||
assert_invariant(r >= add(p, offset));
|
||||
return r;
|
||||
}
|
||||
|
||||
@@ -89,20 +90,19 @@ public:
|
||||
// branch-less allocation
|
||||
void* const p = pointermath::align(current(), alignment, extra);
|
||||
void* const c = pointermath::add(p, size);
|
||||
bool success = c <= end();
|
||||
bool const success = c <= end();
|
||||
set_current(success ? c : current());
|
||||
return success ? p : nullptr;
|
||||
}
|
||||
|
||||
// API specific to this allocator
|
||||
|
||||
void *getCurrent() UTILS_RESTRICT noexcept {
|
||||
return current();
|
||||
}
|
||||
|
||||
// free memory back to the specified point
|
||||
void rewind(void* p) UTILS_RESTRICT noexcept {
|
||||
assert(p>=mBegin && p<end());
|
||||
assert_invariant(p >= mBegin && p < end());
|
||||
set_current(p);
|
||||
}
|
||||
|
||||
@@ -122,16 +122,21 @@ public:
|
||||
void swap(LinearAllocator& rhs) noexcept;
|
||||
|
||||
void *base() noexcept { return mBegin; }
|
||||
void const *base() const noexcept { return mBegin; }
|
||||
|
||||
void free(void*, size_t) UTILS_RESTRICT noexcept { }
|
||||
|
||||
private:
|
||||
protected:
|
||||
void* end() UTILS_RESTRICT noexcept { return pointermath::add(mBegin, mSize); }
|
||||
void const* end() const UTILS_RESTRICT noexcept { return pointermath::add(mBegin, mSize); }
|
||||
|
||||
void* current() UTILS_RESTRICT noexcept { return pointermath::add(mBegin, mCur); }
|
||||
void const* current() const UTILS_RESTRICT noexcept { return pointermath::add(mBegin, mCur); }
|
||||
|
||||
private:
|
||||
void set_current(void* p) UTILS_RESTRICT noexcept {
|
||||
mCur = uint32_t(uintptr_t(p) - uintptr_t(mBegin));
|
||||
}
|
||||
|
||||
void* mBegin = nullptr;
|
||||
uint32_t mSize = 0;
|
||||
uint32_t mCur = 0;
|
||||
@@ -152,9 +157,7 @@ public:
|
||||
explicit HeapAllocator(const AREA&) { }
|
||||
|
||||
// our allocator concept
|
||||
void* alloc(size_t size, size_t alignment = alignof(std::max_align_t), size_t extra = 0) {
|
||||
// this allocator doesn't support 'extra'
|
||||
assert(extra == 0);
|
||||
void* alloc(size_t size, size_t alignment = alignof(std::max_align_t)) {
|
||||
return aligned_alloc(size, alignment);
|
||||
}
|
||||
|
||||
@@ -171,6 +174,50 @@ public:
|
||||
void swap(HeapAllocator&) noexcept { }
|
||||
};
|
||||
|
||||
/* ------------------------------------------------------------------------------------------------
|
||||
* LinearAllocatorWithFallback
|
||||
*
|
||||
* This is a LinearAllocator that falls back to a HeapAllocator when allocation fail. The Heap
|
||||
* allocator memory is freed only when the LinearAllocator is reset or destroyed.
|
||||
* ------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
class LinearAllocatorWithFallback : private LinearAllocator, private HeapAllocator {
|
||||
std::vector<void*> mHeapAllocations;
|
||||
public:
|
||||
LinearAllocatorWithFallback(void* begin, void* end) noexcept
|
||||
: LinearAllocator(begin, end) {
|
||||
}
|
||||
|
||||
template <typename AREA>
|
||||
explicit LinearAllocatorWithFallback(const AREA& area)
|
||||
: LinearAllocatorWithFallback(area.begin(), area.end()) {
|
||||
}
|
||||
|
||||
~LinearAllocatorWithFallback() noexcept {
|
||||
LinearAllocatorWithFallback::reset();
|
||||
}
|
||||
|
||||
void* alloc(size_t size, size_t alignment = alignof(std::max_align_t));
|
||||
|
||||
void *getCurrent() noexcept {
|
||||
return LinearAllocator::getCurrent();
|
||||
}
|
||||
|
||||
void rewind(void* p) noexcept {
|
||||
if (p >= LinearAllocator::base() && p < LinearAllocator::end()) {
|
||||
LinearAllocator::rewind(p);
|
||||
}
|
||||
}
|
||||
|
||||
void reset() noexcept;
|
||||
|
||||
void free(void*, size_t) noexcept { }
|
||||
|
||||
bool isHeapAllocation(void* p) const noexcept {
|
||||
return p < LinearAllocator::base() || p >= LinearAllocator::end();
|
||||
}
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
|
||||
class FreeList {
|
||||
@@ -186,13 +233,13 @@ public:
|
||||
Node* const head = mHead;
|
||||
mHead = head ? head->next : nullptr;
|
||||
// this could indicate a use after free
|
||||
assert(!mHead || mHead >= mBegin && mHead < mEnd);
|
||||
assert_invariant(!mHead || mHead >= mBegin && mHead < mEnd);
|
||||
return head;
|
||||
}
|
||||
|
||||
void push(void* p) noexcept {
|
||||
assert(p);
|
||||
assert(p >= mBegin && p < mEnd);
|
||||
assert_invariant(p);
|
||||
assert_invariant(p >= mBegin && p < mEnd);
|
||||
// TODO: assert this is one of our pointer (i.e.: it's address match one of ours)
|
||||
Node* const head = static_cast<Node*>(p);
|
||||
head->next = mHead;
|
||||
@@ -229,7 +276,7 @@ public:
|
||||
AtomicFreeList& operator=(const FreeList& rhs) = delete;
|
||||
|
||||
void* pop() noexcept {
|
||||
Node* const storage = mStorage;
|
||||
Node* const pStorage = mStorage;
|
||||
|
||||
HeadPtr currentHead = mHead.load();
|
||||
while (currentHead.offset >= 0) {
|
||||
@@ -237,8 +284,8 @@ public:
|
||||
// thread raced ahead of us. But in that case, the computed "newHead" will be discarded
|
||||
// since compare_exchange_weak fails. Then this thread will loop with the updated
|
||||
// value of currentHead, and try again.
|
||||
Node* const next = storage[currentHead.offset].next.load(std::memory_order_relaxed);
|
||||
const HeadPtr newHead{ next ? int32_t(next - storage) : -1, currentHead.tag + 1 };
|
||||
Node* const pNext = pStorage[currentHead.offset].next.load(std::memory_order_relaxed);
|
||||
const HeadPtr newHead{ pNext ? int32_t(pNext - pStorage) : -1, currentHead.tag + 1 };
|
||||
// In the rare case that the other thread that raced ahead of us already returned the
|
||||
// same mHead we just loaded, but it now has a different "next" value, the tag field will not
|
||||
// match, and compare_exchange_weak will fail and prevent that particular race condition.
|
||||
@@ -246,18 +293,18 @@ public:
|
||||
// This assert needs to occur after we have validated that there was no race condition
|
||||
// Otherwise, next might already contain application data, if another thread
|
||||
// raced ahead of us after we loaded mHead, but before we loaded mHead->next.
|
||||
assert(!next || next >= storage);
|
||||
assert_invariant(!pNext || pNext >= pStorage);
|
||||
break;
|
||||
}
|
||||
}
|
||||
void* p = (currentHead.offset >= 0) ? (storage + currentHead.offset) : nullptr;
|
||||
assert(!p || p >= storage);
|
||||
void* p = (currentHead.offset >= 0) ? (pStorage + currentHead.offset) : nullptr;
|
||||
assert_invariant(!p || p >= pStorage);
|
||||
return p;
|
||||
}
|
||||
|
||||
void push(void* p) noexcept {
|
||||
Node* const storage = mStorage;
|
||||
assert(p && p >= storage);
|
||||
assert_invariant(p && p >= storage);
|
||||
Node* const node = static_cast<Node*>(p);
|
||||
HeadPtr currentHead = mHead.load();
|
||||
HeadPtr newHead = { int32_t(node - storage), currentHead.tag + 1 };
|
||||
@@ -330,9 +377,9 @@ public:
|
||||
// our allocator concept
|
||||
void* alloc(size_t size = ELEMENT_SIZE,
|
||||
size_t alignment = ALIGNMENT, size_t offset = OFFSET) noexcept {
|
||||
assert(size <= ELEMENT_SIZE);
|
||||
assert(alignment <= ALIGNMENT);
|
||||
assert(offset == OFFSET);
|
||||
assert_invariant(size <= ELEMENT_SIZE);
|
||||
assert_invariant(alignment <= ALIGNMENT);
|
||||
assert_invariant(offset == OFFSET);
|
||||
return mFreeList.pop();
|
||||
}
|
||||
|
||||
@@ -587,23 +634,36 @@ public:
|
||||
|
||||
// allocate memory from arena with given size and alignment
|
||||
// (acceptable size/alignment may depend on the allocator provided)
|
||||
void* alloc(size_t size, size_t alignment = alignof(std::max_align_t), size_t extra = 0) noexcept {
|
||||
void* alloc(size_t size, size_t alignment, size_t extra) noexcept {
|
||||
std::lock_guard<LockingPolicy> guard(mLock);
|
||||
void* p = mAllocator.alloc(size, alignment, extra);
|
||||
mListener.onAlloc(p, size, alignment, extra);
|
||||
return p;
|
||||
}
|
||||
|
||||
void* alloc(size_t size, size_t alignment = alignof(std::max_align_t)) noexcept {
|
||||
std::lock_guard<LockingPolicy> guard(mLock);
|
||||
void* p = mAllocator.alloc(size, alignment);
|
||||
mListener.onAlloc(p, size, alignment, 0);
|
||||
return p;
|
||||
}
|
||||
|
||||
// Allocate an array of trivially destructible objects
|
||||
// for safety, we disable the object-based alloc method if the object type is not
|
||||
// trivially destructible, since free() won't call the destructor and this is allocating
|
||||
// an array.
|
||||
template <typename T,
|
||||
typename = typename std::enable_if<std::is_trivially_destructible<T>::value>::type>
|
||||
T* alloc(size_t count, size_t alignment = alignof(T), size_t extra = 0) noexcept {
|
||||
T* alloc(size_t count, size_t alignment, size_t extra) noexcept {
|
||||
return (T*)alloc(count * sizeof(T), alignment, extra);
|
||||
}
|
||||
|
||||
template <typename T,
|
||||
typename = typename std::enable_if<std::is_trivially_destructible<T>::value>::type>
|
||||
T* alloc(size_t count, size_t alignment = alignof(T)) noexcept {
|
||||
return (T*)alloc(count * sizeof(T), alignment);
|
||||
}
|
||||
|
||||
// return memory pointed by p to the arena
|
||||
// (actual behaviour may depend on allocator provided)
|
||||
void free(void* p) noexcept {
|
||||
@@ -720,6 +780,8 @@ class ArenaScope {
|
||||
}
|
||||
|
||||
public:
|
||||
using Arena = ARENA;
|
||||
|
||||
explicit ArenaScope(ARENA& allocator)
|
||||
: mArena(allocator), mRewind(allocator.getCurrent()) {
|
||||
}
|
||||
@@ -771,7 +833,7 @@ public:
|
||||
}
|
||||
|
||||
// use with caution
|
||||
ARENA& getAllocator() noexcept { return mArena; }
|
||||
ARENA& getArena() noexcept { return mArena; }
|
||||
|
||||
private:
|
||||
ARENA& mArena;
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
|
||||
#include <utils/Allocator.h>
|
||||
|
||||
#include <utils/compiler.h>
|
||||
#include <utils/debug.h>
|
||||
#include <utils/Log.h>
|
||||
|
||||
#include <algorithm>
|
||||
@@ -52,6 +54,29 @@ void LinearAllocator::swap(LinearAllocator& rhs) noexcept {
|
||||
std::swap(mCur, rhs.mCur);
|
||||
}
|
||||
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
// LinearAllocatorWithFallback
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
|
||||
void* LinearAllocatorWithFallback::alloc(size_t size, size_t alignment) {
|
||||
void* p = LinearAllocator::alloc(size, alignment);
|
||||
if (UTILS_UNLIKELY(!p)) {
|
||||
p = HeapAllocator::alloc(size, alignment);
|
||||
mHeapAllocations.push_back(p);
|
||||
}
|
||||
assert_invariant(p);
|
||||
return p;
|
||||
}
|
||||
|
||||
void LinearAllocatorWithFallback::reset() noexcept {
|
||||
LinearAllocator::reset();
|
||||
for (auto* p : mHeapAllocations) {
|
||||
HeapAllocator::free(p);
|
||||
}
|
||||
mHeapAllocations.clear();
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
// FreeList
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
@@ -61,8 +86,8 @@ FreeList::Node* FreeList::init(void* begin, void* end,
|
||||
{
|
||||
void* const p = pointermath::align(begin, alignment, extra);
|
||||
void* const n = pointermath::align(pointermath::add(p, elementSize), alignment, extra);
|
||||
assert(p >= begin && p < end);
|
||||
assert(n >= begin && n < end && n > p);
|
||||
assert_invariant(p >= begin && p < end);
|
||||
assert_invariant(n >= begin && n < end && n > p);
|
||||
|
||||
const size_t d = uintptr_t(n) - uintptr_t(p);
|
||||
const size_t num = (uintptr_t(end) - uintptr_t(p)) / d;
|
||||
@@ -77,8 +102,8 @@ FreeList::Node* FreeList::init(void* begin, void* end,
|
||||
cur->next = next;
|
||||
cur = next;
|
||||
}
|
||||
assert(cur < end);
|
||||
assert(pointermath::add(cur, d) <= end);
|
||||
assert_invariant(cur < end);
|
||||
assert_invariant(pointermath::add(cur, d) <= end);
|
||||
cur->next = nullptr;
|
||||
return head;
|
||||
}
|
||||
@@ -97,13 +122,13 @@ AtomicFreeList::AtomicFreeList(void* begin, void* end,
|
||||
{
|
||||
#ifdef __ANDROID__
|
||||
// on some platform (e.g. web) this returns false. we really only care about mobile though.
|
||||
assert(mHead.is_lock_free());
|
||||
assert_invariant(mHead.is_lock_free());
|
||||
#endif
|
||||
|
||||
void* const p = pointermath::align(begin, alignment, extra);
|
||||
void* const n = pointermath::align(pointermath::add(p, elementSize), alignment, extra);
|
||||
assert(p >= begin && p < end);
|
||||
assert(n >= begin && n < end && n > p);
|
||||
assert_invariant(p >= begin && p < end);
|
||||
assert_invariant(n >= begin && n < end && n > p);
|
||||
|
||||
const size_t d = uintptr_t(n) - uintptr_t(p);
|
||||
const size_t num = (uintptr_t(end) - uintptr_t(p)) / d;
|
||||
@@ -119,8 +144,8 @@ AtomicFreeList::AtomicFreeList(void* begin, void* end,
|
||||
cur->next = next;
|
||||
cur = next;
|
||||
}
|
||||
assert(cur < end);
|
||||
assert(pointermath::add(cur, d) <= end);
|
||||
assert_invariant(cur < end);
|
||||
assert_invariant(pointermath::add(cur, d) <= end);
|
||||
cur->next = nullptr;
|
||||
|
||||
mHead.store({ int32_t(head - mStorage), 0 });
|
||||
@@ -148,22 +173,25 @@ TrackingPolicy::HighWatermark::~HighWatermark() noexcept {
|
||||
}
|
||||
|
||||
void TrackingPolicy::HighWatermark::onFree(void* p, size_t size) noexcept {
|
||||
assert(mCurrent >= size);
|
||||
// FIXME: this code is incorrect with LinearAllocators because free() is a no-op for them
|
||||
assert_invariant(mCurrent >= size);
|
||||
mCurrent -= uint32_t(size);
|
||||
}
|
||||
void TrackingPolicy::HighWatermark::onReset() noexcept {
|
||||
// we should never be here if mBase is nullptr because compilation would have failed when
|
||||
// Arena::onReset() tries to call the underlying allocator's onReset()
|
||||
assert(mBase);
|
||||
assert_invariant(mBase);
|
||||
mCurrent = 0;
|
||||
}
|
||||
|
||||
void TrackingPolicy::HighWatermark::onRewind(void const* addr) noexcept {
|
||||
// we should never be here if mBase is nullptr because compilation would have failed when
|
||||
// Arena::onRewind() tries to call the underlying allocator's onReset()
|
||||
assert(mBase);
|
||||
assert(addr >= mBase);
|
||||
mCurrent = uint32_t(uintptr_t(addr) - uintptr_t(mBase));
|
||||
assert_invariant(mBase);
|
||||
// for LinearAllocatorWithFallback we could get pointers outside the range
|
||||
if (addr >= mBase && addr < pointermath::add(mBase, mSize)) {
|
||||
mCurrent = uint32_t(uintptr_t(addr) - uintptr_t(mBase));
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
@@ -183,7 +211,7 @@ void TrackingPolicy::Debug::onFree(void* p, size_t size) noexcept {
|
||||
void TrackingPolicy::Debug::onReset() noexcept {
|
||||
// we should never be here if mBase is nullptr because compilation would have failed when
|
||||
// Arena::onReset() tries to call the underlying allocator's onReset()
|
||||
assert(mBase);
|
||||
assert_invariant(mBase);
|
||||
memset(mBase, 0xec, mSize);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user