Compare commits

..

1 Commits

Author SHA1 Message Date
Powei Feng
ce4199b5f7 Testing rendering to quad 2024-08-27 13:47:43 -07:00
323 changed files with 8750 additions and 15254 deletions

View File

@@ -1,17 +0,0 @@
name: 'Android Continuous'
inputs:
build-abi:
description: 'The target platform ABI'
required: true
default: 'armeabi-v7a'
runs:
using: "composite"
steps:
- uses: actions/setup-java@v3
with:
distribution: 'temurin'
java-version: '17'
- name: Run build script
run: |
cd build/android && printf "y" | ./build.sh continuous ${{ inputs.build-abi }}
shell: bash

View File

@@ -1,9 +0,0 @@
name: 'ubuntu apt add deb-src'
runs:
using: "composite"
steps:
- name: "ubuntu apt add deb-src"
run: |
echo "deb-src http://archive.ubuntu.com/ubuntu jammy main restricted universe" | sudo tee /etc/apt/sources.list.d/my.list
sudo apt-get update
shell: bash

View File

@@ -10,13 +10,30 @@ on:
jobs:
build-android:
name: build-android
# We intentially use a larger runner here to enable larger disk space
# (standard linux runner will fail on disk space and faster build time).
runs-on: ubuntu-22.04-32core
runs-on: macos-14
steps:
- uses: actions/checkout@v4.1.6
- name: Run Android Continuous
uses: ./.github/actions/android-continuous
- uses: actions/setup-java@v3
with:
build-abi: armeabi-v7a,arm64-v8a,x86_64
distribution: 'temurin'
java-version: '17'
- name: Run build script
run: |
cd build/android && printf "y" | ./build.sh continuous
- uses: actions/upload-artifact@v1.0.0
with:
name: filament-android
path: out/filament-android-release.aar
- uses: actions/upload-artifact@v1.0.0
with:
name: filamat-android-full
path: out/filamat-android-release.aar
- uses: actions/upload-artifact@v1.0.0
with:
name: gltfio-android-release
path: out/gltfio-android-release.aar
- uses: actions/upload-artifact@v1.0.0
with:
name: filament-utils-android-release
path: out/filament-utils-android-release.aar

View File

@@ -10,14 +10,14 @@ on:
jobs:
build-ios:
name: build-ios
runs-on: macos-14-xlarge
runs-on: macos-14
steps:
- uses: actions/checkout@v4.1.6
- name: Run build script
run: |
cd build/ios && printf "y" | ./build.sh continuous
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v1.0.0
with:
name: filament-ios
path: out/filament-release-ios.tgz

View File

@@ -10,14 +10,14 @@ on:
jobs:
build-linux:
name: build-linux
runs-on: ubuntu-22.04-16core
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4.1.6
- name: Run build script
run: |
cd build/linux && printf "y" | ./build.sh continuous
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v1.0.0
with:
name: filament-linux
path: out/filament-release-linux.tgz

View File

@@ -10,14 +10,14 @@ on:
jobs:
build-mac:
name: build-mac
runs-on: macos-14-xlarge
runs-on: macos-14
steps:
- uses: actions/checkout@v4.1.6
- name: Run build script
run: |
cd build/mac && printf "y" | ./build.sh continuous
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v1.0.0
with:
name: filament-mac
path: out/filament-release-darwin.tgz

View File

@@ -15,7 +15,7 @@ jobs:
strategy:
matrix:
os: [macos-14-xlarge, ubuntu-22.04-16core]
os: [macos-14, ubuntu-22.04]
steps:
- uses: actions/checkout@v4.1.6
@@ -29,7 +29,7 @@ jobs:
build-windows:
name: build-windows
runs-on: win-2019-16core
runs-on: windows-2019
steps:
- uses: actions/checkout@v4.1.6
@@ -40,7 +40,7 @@ jobs:
build-android:
name: build-android
runs-on: ubuntu-22.04-16core
runs-on: macos-14
steps:
- uses: actions/checkout@v4.1.6
@@ -49,14 +49,12 @@ jobs:
distribution: 'temurin'
java-version: '17'
- name: Run build script
# Only build 1 64 bit target during presubmit to cut down build times during presubmit
# Continuous builds will build everything
run: |
cd build/android && printf "y" | ./build.sh presubmit arm64-v8a
cd build/android && printf "y" | ./build.sh presubmit
build-ios:
name: build-iOS
runs-on: macos-14-xlarge
runs-on: macos-14
steps:
- uses: actions/checkout@v4.1.6
@@ -69,25 +67,10 @@ jobs:
build-web:
name: build-web
runs-on: ubuntu-22.04-16core
runs-on: macos-14
steps:
- uses: actions/checkout@v4.1.6
- name: Run build script
run: |
cd build/web && printf "y" | ./build.sh presubmit
test-renderdiff:
name: test-renderdiff
runs-on: ubuntu-22.04-32core
steps:
- uses: actions/checkout@v4.1.6
- uses: ./.github/actions/ubuntu-apt-add-src
- name: Run script
run: |
source ./build/linux/ci-common.sh && bash test/renderdiff_tests.sh
- uses: actions/upload-artifact@v4
with:
name: presubmit-renderdiff-result
path: ./out/renderdiff_tests

View File

@@ -31,7 +31,7 @@ jobs:
strategy:
matrix:
os: [macos-14-xlarge, ubuntu-22.04-32core]
os: [macos-14, ubuntu-22.04]
steps:
- name: Decide Git ref
@@ -65,7 +65,7 @@ jobs:
build-web:
name: build-web
runs-on: ubuntu-22.04-16core
runs-on: macos-14
if: github.event_name == 'release' || github.event.inputs.platform == 'web'
steps:
@@ -98,7 +98,7 @@ jobs:
build-android:
name: build-android
runs-on: ubuntu-22.04-16core
runs-on: macos-14
if: github.event_name == 'release' || github.event.inputs.platform == 'android'
steps:
@@ -120,7 +120,7 @@ jobs:
env:
TAG: ${{ steps.git_ref.outputs.tag }}
run: |
cd build/android && printf "y" | ./build.sh release armeabi-v7a,arm64-v8a,x86,x86_64
cd build/android && printf "y" | ./build.sh release
cd ../..
mv out/filament-android-release.aar out/filament-${TAG}-android.aar
mv out/filamat-android-release.aar out/filamat-${TAG}-android.aar
@@ -152,7 +152,7 @@ jobs:
build-ios:
name: build-ios
runs-on: macos-14-xlarge
runs-on: macos-14
if: github.event_name == 'release' || github.event.inputs.platform == 'ios'
steps:
@@ -185,7 +185,7 @@ jobs:
build-windows:
name: build-windows
runs-on: windows-2019-32core
runs-on: windows-2019
if: github.event_name == 'release' || github.event.inputs.platform == 'windows'
steps:
@@ -205,7 +205,6 @@ jobs:
TAG: ${{ steps.git_ref.outputs.tag }}
run: |
build\windows\build-github.bat release
echo on
move out\filament-windows.tgz out\filament-%TAG%-windows.tgz
shell: cmd
- uses: actions/github-script@v6

View File

@@ -10,14 +10,14 @@ on:
jobs:
build-web:
name: build-web
runs-on: ubuntu-22.04-16core
runs-on: macos-14
steps:
- uses: actions/checkout@v4.1.6
- name: Run build script
run: |
cd build/web && printf "y" | ./build.sh continuous
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v1.0.0
with:
name: filament-web
path: out/filament-release-web.tgz

View File

@@ -10,7 +10,7 @@ on:
jobs:
build-windows:
name: build-windows
runs-on: windows-2019-32core
runs-on: windows-2019
steps:
- uses: actions/checkout@v4.1.6
@@ -18,7 +18,7 @@ jobs:
run: |
build\windows\build-github.bat continuous
shell: cmd
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v1.0.0
with:
name: filament-windows
path: out/filament-windows.tgz

View File

@@ -45,8 +45,6 @@ option(FILAMENT_ENABLE_FEATURE_LEVEL_0 "Enable Feature Level 0" ON)
option(FILAMENT_ENABLE_MULTIVIEW "Enable multiview for Filament" OFF)
option(FILAMENT_SUPPORTS_OSMESA "Enable OSMesa (headless GL context) for Filament" OFF)
set(FILAMENT_NDK_VERSION "" CACHE STRING
"Android NDK version or version prefix to be used when building for Android."
)
@@ -75,10 +73,6 @@ set(FILAMENT_BACKEND_DEBUG_FLAG "" CACHE STRING
"A debug flag meant for enabling/disabling backend debugging paths"
)
set(FILAMENT_OSMESA_PATH "" CACHE STRING
"Path to the OSMesa header and lib"
)
# Enable exceptions by default in spirv-cross.
set(SPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS OFF)
@@ -138,22 +132,12 @@ else()
endif()
if (LINUX)
if (NOT FILAMENT_OSMESA_PATH STREQUAL "")
if (NOT EXISTS ${FILAMENT_OSMESA_PATH}/)
message(FATAL_ERROR "Cannot find specified OSMesa build directory: ${FILAMENT_OSMESA_PATH}")
endif()
set(FILAMENT_SUPPORTS_OSMESA TRUE)
endif()
if (FILAMENT_SUPPORTS_WAYLAND)
add_definitions(-DFILAMENT_SUPPORTS_WAYLAND)
set(FILAMENT_SUPPORTS_X11 FALSE)
elseif (FILAMENT_SUPPORTS_EGL_ON_LINUX)
add_definitions(-DFILAMENT_SUPPORTS_EGL_ON_LINUX)
set(FILAMENT_SUPPORTS_X11 FALSE)
elseif (FILAMENT_SUPPORTS_OSMESA)
set(FILAMENT_SUPPORTS_X11 FALSE)
add_definitions(-DFILAMENT_SUPPORTS_OSMESA)
else ()
if (FILAMENT_SUPPORTS_XCB)
add_definitions(-DFILAMENT_SUPPORTS_XCB)

View File

@@ -7,4 +7,3 @@ for next branch cut* header.
appropriate header in [RELEASE_NOTES.md](./RELEASE_NOTES.md).
## Release notes for next branch cut
- vk: fix stage pool gc logic

View File

@@ -31,7 +31,7 @@ repositories {
}
dependencies {
implementation 'com.google.android.filament:filament-android:1.56.0'
implementation 'com.google.android.filament:filament-android:1.54.0'
}
```
@@ -51,9 +51,19 @@ Here are all the libraries available in the group `com.google.android.filament`:
iOS projects can use CocoaPods to install the latest release:
```shell
pod 'Filament', '~> 1.56.0'
pod 'Filament', '~> 1.54.0'
```
### Snapshots
If you prefer to live on the edge, you can download a continuous build by following the following
steps:
1. Find the [commit](https://github.com/google/filament/commits/main) you're interested in.
2. Click the green check mark under the commit message.
3. Click on the _Details_ link for the platform you're interested in.
4. On the top left click _Summary_, then in the _Artifacts_ section choose the desired artifact.
## Documentation
- [Filament](https://google.github.io/filament/Filament.html), an in-depth explanation of

View File

@@ -7,32 +7,6 @@ A new header is inserted each time a *tag* is created.
Instead, if you are authoring a PR for the main branch, add your release note to
[NEW_RELEASE_NOTES.md](./NEW_RELEASE_NOTES.md).
## v1.56.1
## v1.56.0
- backend: descriptor layouts distinguish samplers and external samplers (b/376089915) [⚠️ **New Material Version**]
## v1.55.1
## v1.55.0
- Add descriptor sets to describe shader resources. [⚠️ **New Material Version**]
## v1.54.5
## v1.54.4
- Add support for multi-layered render target with array textures.
## v1.54.3
## v1.54.2
- Add a `name` API to Filament objects for debugging handle use-after-free assertions
## v1.54.1
@@ -660,7 +634,7 @@ Instead, if you are authoring a PR for the main branch, add your release note to
- engine: Binary size improvements.
- engine: Add basic support for instanced renderables [**NEW API**].
- engine: Fix, first imaged passed to `Stream::SetAcquiredImage` is ignored and leaked.
- engine: Fix, first imaged passsed to `Stream::SetAcquiredImage` is ignored and leaked.
- Vulkan: Robustness improvements.
- Java: Fix, lookAt z axis negated.
- gltfio: Be graceful when model has > 4 weights per vert.

View File

@@ -83,12 +83,12 @@ buildscript {
'minSdk': 21,
'targetSdk': 34,
'compileSdk': 34,
'kotlin': '2.0.21',
'kotlin_coroutines': '1.9.0',
'buildTools': '35.0.0',
'kotlin': '2.0.0',
'kotlin_coroutines': '1.9.0-RC',
'buildTools': '34.0.0',
'ndk': '27.0.11718014',
'androidx_core': '1.13.1',
'androidx_annotations': '1.9.0'
'androidx_annotations': '1.8.0'
]
ext.deps = [
@@ -104,7 +104,7 @@ buildscript {
]
dependencies {
classpath 'com.android.tools.build:gradle:8.6.1'
classpath 'com.android.tools.build:gradle:8.4.1'
classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:${versions.kotlin}"
}

View File

@@ -499,37 +499,6 @@ Java_com_google_android_filament_Engine_nGetActiveFeatureLevel(JNIEnv *, jclass,
return (jint)engine->getActiveFeatureLevel();
}
extern "C"
JNIEXPORT jboolean JNICALL
Java_com_google_android_filament_Engine_nHasFeatureFlag(JNIEnv *env, jclass clazz,
jlong nativeEngine, jstring name_) {
Engine* engine = (Engine*) nativeEngine;
const char *name = env->GetStringUTFChars(name_, 0);
std::optional<bool> result = engine->getFeatureFlag(name);
env->ReleaseStringUTFChars(name_, name);
return result.has_value();
}
extern "C"
JNIEXPORT jboolean JNICALL
Java_com_google_android_filament_Engine_nSetFeatureFlag(JNIEnv *env, jclass clazz,
jlong nativeEngine, jstring name_, jboolean value) {
Engine* engine = (Engine*) nativeEngine;
const char *name = env->GetStringUTFChars(name_, 0);
jboolean result = engine->setFeatureFlag(name, (bool)value);
env->ReleaseStringUTFChars(name_, name);
return result;
}
extern "C"
JNIEXPORT jboolean JNICALL
Java_com_google_android_filament_Engine_nGetFeatureFlag(JNIEnv *env, jclass clazz,
jlong nativeEngine, jstring name_) {
Engine* engine = (Engine*) nativeEngine;
const char *name = env->GetStringUTFChars(name_, 0);
std::optional<bool> result = engine->getFeatureFlag(name);
env->ReleaseStringUTFChars(name_, name);
return result.value_or(false); // we should never fail here
}
extern "C" JNIEXPORT jlong JNICALL Java_com_google_android_filament_Engine_nCreateBuilder(JNIEnv*,
jclass) {
Engine::Builder* builder = new Engine::Builder{};
@@ -551,12 +520,13 @@ extern "C" JNIEXPORT void JNICALL Java_com_google_android_filament_Engine_nSetBu
extern "C" JNIEXPORT void JNICALL Java_com_google_android_filament_Engine_nSetBuilderConfig(JNIEnv*,
jclass, jlong nativeBuilder, jlong commandBufferSizeMB, jlong perRenderPassArenaSizeMB,
jlong driverHandleArenaSizeMB, jlong minCommandBufferSizeMB, jlong perFrameCommandsSizeMB,
jlong jobSystemThreadCount, jboolean disableParallelShaderCompile,
jlong jobSystemThreadCount,
jlong textureUseAfterFreePoolSize, jboolean disableParallelShaderCompile,
jint stereoscopicType, jlong stereoscopicEyeCount,
jlong resourceAllocatorCacheSizeMB, jlong resourceAllocatorCacheMaxAge,
jboolean disableHandleUseAfterFreeCheck,
jint preferredShaderLanguage,
jboolean forceGLES2Context, jboolean assertNativeWindowIsValid) {
jboolean forceGLES2Context) {
Engine::Builder* builder = (Engine::Builder*) nativeBuilder;
Engine::Config config = {
.commandBufferSizeMB = (uint32_t) commandBufferSizeMB,
@@ -565,6 +535,7 @@ extern "C" JNIEXPORT void JNICALL Java_com_google_android_filament_Engine_nSetBu
.minCommandBufferSizeMB = (uint32_t) minCommandBufferSizeMB,
.perFrameCommandsSizeMB = (uint32_t) perFrameCommandsSizeMB,
.jobSystemThreadCount = (uint32_t) jobSystemThreadCount,
.textureUseAfterFreePoolSize = (uint32_t) textureUseAfterFreePoolSize,
.disableParallelShaderCompile = (bool) disableParallelShaderCompile,
.stereoscopicType = (Engine::StereoscopicType) stereoscopicType,
.stereoscopicEyeCount = (uint8_t) stereoscopicEyeCount,
@@ -573,7 +544,6 @@ extern "C" JNIEXPORT void JNICALL Java_com_google_android_filament_Engine_nSetBu
.disableHandleUseAfterFreeCheck = (bool) disableHandleUseAfterFreeCheck,
.preferredShaderLanguage = (Engine::Config::ShaderLanguage) preferredShaderLanguage,
.forceGLES2Context = (bool) forceGLES2Context,
.assertNativeWindowIsValid = (bool) assertNativeWindowIsValid,
};
builder->config(&config);
}
@@ -596,16 +566,6 @@ extern "C" JNIEXPORT void JNICALL Java_com_google_android_filament_Engine_nSetBu
builder->paused((bool) paused);
}
extern "C"
JNIEXPORT void JNICALL
Java_com_google_android_filament_Engine_nSetBuilderFeature(JNIEnv *env, jclass clazz,
jlong nativeBuilder, jstring name_, jboolean value) {
Engine::Builder* builder = (Engine::Builder*) nativeBuilder;
const char *name = env->GetStringUTFChars(name_, 0);
builder->feature(name, (bool)value);
env->ReleaseStringUTFChars(name_, name);
}
extern "C" JNIEXPORT jlong JNICALL
Java_com_google_android_filament_Engine_nBuilderBuild(JNIEnv*, jclass, jlong nativeBuilder) {
Engine::Builder* builder = (Engine::Builder*) nativeBuilder;

View File

@@ -222,20 +222,6 @@ Java_com_google_android_filament_View_nIsFrontFaceWindingInverted(JNIEnv*,
return static_cast<jboolean>(view->isFrontFaceWindingInverted());
}
extern "C" JNIEXPORT void JNICALL
Java_com_google_android_filament_View_nSetTransparentPickingEnabled(JNIEnv*,
jclass, jlong nativeView, jboolean enabled) {
View* view = (View*) nativeView;
view->setTransparentPickingEnabled(enabled);
}
extern "C" JNIEXPORT jboolean JNICALL
Java_com_google_android_filament_View_nIsTransparentPickingEnabled(JNIEnv*,
jclass, jlong nativeView) {
View* view = (View*) nativeView;
return static_cast<jboolean>(view->isTransparentPickingEnabled());
}
extern "C" JNIEXPORT void JNICALL
Java_com_google_android_filament_View_nSetAmbientOcclusion(JNIEnv*, jclass, jlong nativeView, jint ordinal) {
View* view = (View*) nativeView;

View File

@@ -224,12 +224,13 @@ public class Engine {
nSetBuilderConfig(mNativeBuilder, config.commandBufferSizeMB,
config.perRenderPassArenaSizeMB, config.driverHandleArenaSizeMB,
config.minCommandBufferSizeMB, config.perFrameCommandsSizeMB,
config.jobSystemThreadCount, config.disableParallelShaderCompile,
config.jobSystemThreadCount,
config.textureUseAfterFreePoolSize, config.disableParallelShaderCompile,
config.stereoscopicType.ordinal(), config.stereoscopicEyeCount,
config.resourceAllocatorCacheSizeMB, config.resourceAllocatorCacheMaxAge,
config.disableHandleUseAfterFreeCheck,
config.preferredShaderLanguage.ordinal(),
config.forceGLES2Context, config.assertNativeWindowIsValid);
config.forceGLES2Context);
return this;
}
@@ -258,17 +259,6 @@ public class Engine {
return this;
}
/**
* Set a feature flag value. This is the only way to set constant feature flags.
* @param name feature name
* @param value true to enable, false to disable
* @return A reference to this Builder for chaining calls.
*/
public Builder feature(@NonNull String name, boolean value) {
nSetBuilderFeature(mNativeBuilder, name, value);
return this;
}
/**
* Creates an instance of Engine
*
@@ -404,7 +394,6 @@ public class Engine {
/**
* Set to `true` to forcibly disable parallel shader compilation in the backend.
* Currently only honored by the GL backend.
* @Deprecated use "backend.disable_parallel_shader_compile" feature flag instead
*/
public boolean disableParallelShaderCompile = false;
@@ -430,12 +419,12 @@ public class Engine {
*/
public long stereoscopicEyeCount = 2;
/**
/*
* @Deprecated This value is no longer used.
*/
public long resourceAllocatorCacheSizeMB = 64;
/**
/*
* This value determines how many frames texture entries are kept for in the cache. This
* is a soft limit, meaning some texture older than this are allowed to stay in the cache.
* Typically only one texture is evicted per frame.
@@ -443,13 +432,12 @@ public class Engine {
*/
public long resourceAllocatorCacheMaxAge = 1;
/**
/*
* Disable backend handles use-after-free checks.
* @Deprecated use "backend.disable_handle_use_after_free_check" feature flag instead
*/
public boolean disableHandleUseAfterFreeCheck = false;
/**
/*
* Sets a preferred shader language for Filament to use.
*
* The Metal backend supports two shader languages: MSL (Metal Shading Language) and
@@ -471,20 +459,12 @@ public class Engine {
};
public ShaderLanguage preferredShaderLanguage = ShaderLanguage.DEFAULT;
/**
/*
* When the OpenGL ES backend is used, setting this value to true will force a GLES2.0
* context if supported by the Platform, or if not, will have the backend pretend
* it's a GLES2 context. Ignored on other backends.
*/
public boolean forceGLES2Context = false;
/**
* Assert the native window associated to a SwapChain is valid when calling makeCurrent().
* This is only supported for:
* - PlatformEGLAndroid
* @Deprecated use "backend.opengl.assert_native_window_is_valid" feature flag instead
*/
public boolean assertNativeWindowIsValid = false;
}
private Engine(long nativeEngine, Config config) {
@@ -707,11 +687,11 @@ public class Engine {
/**
* Returns the maximum number of stereoscopic eyes supported by Filament. The actual number of
* eyes rendered is set at Engine creation time with the {@link Config#stereoscopicEyeCount}
* setting.
* eyes rendered is set at Engine creation time with the {@link
* Engine#Config#stereoscopicEyeCount} setting.
*
* @return the max number of stereoscopic eyes supported
* @see Config#stereoscopicEyeCount
* @see Engine#Config#stereoscopicEyeCount
*/
public long getMaxStereoscopicEyes() {
return nGetMaxStereoscopicEyes(getNativeObject());
@@ -908,8 +888,7 @@ public class Engine {
/**
* Returns whether the object is valid.
* @param ma Material
* @param mi MaterialInstance to check for validity
* @param object Object to check for validity
* @return returns true if the specified object is valid.
*/
public boolean isValidMaterialInstance(@NonNull Material ma, MaterialInstance mi) {
@@ -1331,39 +1310,6 @@ public class Engine {
*/
public static native long getSteadyClockTimeNano();
/**
* Checks if a feature flag exists
* @param name name of the feature flag to check
* @return true if it exists false otherwise
*/
public boolean hasFeatureFlag(@NonNull String name) {
return nHasFeatureFlag(mNativeObject, name);
}
/**
* Set the value of a non-constant feature flag.
* @param name name of the feature flag to set
* @param value value to set
* @return true if the value was set, false if the feature flag is constant or doesn't exist.
*/
public boolean setFeatureFlag(@NonNull String name, boolean value) {
return nSetFeatureFlag(mNativeObject, name, value);
}
/**
* Retrieves the value of any feature flag.
* @param name name of the feature flag
* @return the value of the flag if it exists
* @exception IllegalArgumentException is thrown if the feature flag doesn't exist
*/
public boolean getFeatureFlag(@NonNull String name) {
if (!hasFeatureFlag(name)) {
throw new IllegalArgumentException("The feature flag \"" + name + "\" doesn't exist");
}
return nGetFeatureFlag(mNativeObject, name);
}
@UsedByReflection("TextureHelper.java")
public long getNativeObject() {
if (mNativeObject == 0) {
@@ -1453,9 +1399,6 @@ public class Engine {
private static native int nGetSupportedFeatureLevel(long nativeEngine);
private static native int nSetActiveFeatureLevel(long nativeEngine, int ordinal);
private static native int nGetActiveFeatureLevel(long nativeEngine);
private static native boolean nHasFeatureFlag(long nativeEngine, String name);
private static native boolean nSetFeatureFlag(long nativeEngine, String name, boolean value);
private static native boolean nGetFeatureFlag(long nativeEngine, String name);
private static native long nCreateBuilder();
private static native void nDestroyBuilder(long nativeBuilder);
@@ -1463,14 +1406,14 @@ public class Engine {
private static native void nSetBuilderConfig(long nativeBuilder, long commandBufferSizeMB,
long perRenderPassArenaSizeMB, long driverHandleArenaSizeMB,
long minCommandBufferSizeMB, long perFrameCommandsSizeMB, long jobSystemThreadCount,
boolean disableParallelShaderCompile, int stereoscopicType, long stereoscopicEyeCount,
long textureUseAfterFreePoolSize, boolean disableParallelShaderCompile,
int stereoscopicType, long stereoscopicEyeCount,
long resourceAllocatorCacheSizeMB, long resourceAllocatorCacheMaxAge,
boolean disableHandleUseAfterFreeCheck,
int preferredShaderLanguage,
boolean forceGLES2Context, boolean assertNativeWindowIsValid);
boolean forceGLES2Context);
private static native void nSetBuilderFeatureLevel(long nativeBuilder, int ordinal);
private static native void nSetBuilderSharedContext(long nativeBuilder, long sharedContext);
private static native void nSetBuilderPaused(long nativeBuilder, boolean paused);
private static native void nSetBuilderFeature(long nativeBuilder, String name, boolean value);
private static native long nBuilderBuild(long nativeBuilder);
}

View File

@@ -745,33 +745,6 @@ public class View {
nSetFrontFaceWindingInverted(getNativeObject(), inverted);
}
/**
* Returns true if transparent picking is enabled.
*
* @see #setTransparentPickingEnabled
*/
public boolean isTransparentPickingEnabled() {
return nIsTransparentPickingEnabled(getNativeObject());
}
/**
* Enables or disables transparent picking. Disabled by default.
*
* When transparent picking is enabled, View::pick() will pick from both
* transparent and opaque renderables. When disabled, View::pick() will only
* pick from opaque renderables.
*
* <p>
* Transparent picking will create an extra pass for rendering depth
* from both transparent and opaque renderables.
* </p>
*
* @param enabled true enables transparent picking, false disables it.
*/
public void setTransparentPickingEnabled(boolean enabled) {
nSetTransparentPickingEnabled(getNativeObject(), enabled);
}
/**
* Sets options relative to dynamic lighting for this view.
*
@@ -1308,8 +1281,6 @@ public class View {
private static native boolean nIsPostProcessingEnabled(long nativeView);
private static native void nSetFrontFaceWindingInverted(long nativeView, boolean inverted);
private static native boolean nIsFrontFaceWindingInverted(long nativeView);
private static native void nSetTransparentPickingEnabled(long nativeView, boolean enabled);
private static native boolean nIsTransparentPickingEnabled(long nativeView);
private static native void nSetAmbientOcclusion(long nativeView, int ordinal);
private static native int nGetAmbientOcclusion(long nativeView);
private static native void nSetAmbientOcclusionOptions(long nativeView, float radius, float bias, float power, float resolution, float intensity, float bilateralThreshold, int quality, int lowPassFilter, int upsampling, boolean enabled, boolean bentNormals, float minHorizonAngleRad);

View File

@@ -1,5 +1,5 @@
GROUP=com.google.android.filament
VERSION_NAME=1.56.0
VERSION_NAME=1.54.0
POM_DESCRIPTION=Real-time physically based rendering engine for Android.

View File

@@ -1,6 +1,6 @@
#Wed Nov 17 10:40:18 PST 2021
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-8.6-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists

View File

@@ -6,10 +6,6 @@ plugins {
project.ext.isSample = true
kotlin {
jvmToolchain(versions.jdk)
}
filamentTools {
cmgenArgs = "-q --format=ktx --size=256 --extract-blur=0.1 --deploy=src/main/assets/envs/default_env"
iblInputFile = project.layout.projectDirectory.file("../../../third_party/environments/lightroom_14b.hdr")
@@ -17,7 +13,7 @@ filamentTools {
}
// don't forget to update MainACtivity.kt when/if changing this.
tasks.register('copyMesh', Copy) {
task copyMesh(type: Copy) {
from "../../../third_party/models/BusterDrone"
into "src/main/assets/models"
}

View File

@@ -112,13 +112,7 @@ class MainActivity : Activity() {
}
private fun setupFilament() {
val config = Engine.Config()
//config.forceGLES2Context = true
engine = Engine.Builder()
.config(config)
.featureLevel(Engine.FeatureLevel.FEATURE_LEVEL_0)
.build()
engine = Engine.Builder().featureLevel(Engine.FeatureLevel.FEATURE_LEVEL_0).build()
renderer = engine.createRenderer()
scene = engine.createScene()
view = engine.createView()
@@ -129,9 +123,7 @@ class MainActivity : Activity() {
scene.skybox = Skybox.Builder().color(0.035f, 0.035f, 0.035f, 1.0f).build(engine)
// post-processing is not supported at feature level 0
if (engine.activeFeatureLevel == Engine.FeatureLevel.FEATURE_LEVEL_0) {
view.isPostProcessingEnabled = false
}
view.isPostProcessingEnabled = false
// Tell the view which camera we want to use
view.camera = camera

View File

@@ -64,9 +64,6 @@ function print_help {
echo " enabling debug paths in the backend from the build script. For example, make a"
echo " systrace-enabled build without directly changing #defines. Remember to add -f when"
echo " changing this option."
echo " -X osmesa_path"
echo " Indicates a path to a completed OSMesa build. OSMesa is used to create an offscreen GL"
echo " context for software rasterization"
echo " -S type"
echo " Enable stereoscopic rendering where type is one of [instanced|multiview]. This is only"
echo " meant for building the samples."
@@ -183,8 +180,6 @@ BACKEND_DEBUG_FLAG_OPTION=""
STEREOSCOPIC_OPTION=""
OSMESA_OPTION=""
IOS_BUILD_SIMULATOR=false
BUILD_UNIVERSAL_LIBRARIES=false
@@ -245,7 +240,6 @@ function build_desktop_target {
${ASAN_UBSAN_OPTION} \
${BACKEND_DEBUG_FLAG_OPTION} \
${STEREOSCOPIC_OPTION} \
${OSMESA_OPTION} \
${architectures} \
../..
ln -sf "out/cmake-${lc_target}/compile_commands.json" \
@@ -802,7 +796,7 @@ function check_debug_release_build {
pushd "$(dirname "$0")" > /dev/null
while getopts ":hacCfgijmp:q:uvslwedk:bx:S:X:" opt; do
while getopts ":hacCfgijmp:q:uvslwedk:bx:S:" opt; do
case ${opt} in
h)
print_help
@@ -956,8 +950,6 @@ while getopts ":hacCfgijmp:q:uvslwedk:bx:S:X:" opt; do
exit 1
esac
;;
X) OSMESA_OPTION="-DFILAMENT_OSMESA_PATH=${OPTARG}"
;;
\?)
echo "Invalid option: -${OPTARG}" >&2
echo ""

View File

@@ -60,7 +60,13 @@ if [[ ! -d "${ANDROID_HOME}/ndk/$FILAMENT_NDK_VERSION" ]]; then
yes | ${ANDROID_HOME}/cmdline-tools/latest/bin/sdkmanager --licenses
${ANDROID_HOME}/cmdline-tools/latest/bin/sdkmanager "ndk;$FILAMENT_NDK_VERSION"
fi
# Only build 1 64 bit target during presubmit to cut down build times during presubmit
# Continuous builds will build everything
ANDROID_ABIS=
if [[ "$TARGET" == "presubmit" ]]; then
ANDROID_ABIS="-q arm64-v8a"
fi
# Build the Android sample-gltf-viewer APK during release.
BUILD_SAMPLES=
@@ -68,19 +74,5 @@ if [[ "$TARGET" == "release" ]]; then
BUILD_SAMPLES="-k sample-gltf-viewer"
fi
function build_android() {
local ABI=$1
# Do the following in two steps so that we do not run out of space
if [[ -n "${BUILD_DEBUG}" ]]; then
FILAMENT_NDK_VERSION=${FILAMENT_NDK_VERSION} ./build.sh -p android -q ${ABI} -c ${BUILD_SAMPLES} ${GENERATE_ARCHIVES} ${BUILD_DEBUG}
rm -rf out/cmake-android-debug-*
fi
if [[ -n "${BUILD_RELEASE}" ]]; then
FILAMENT_NDK_VERSION=${FILAMENT_NDK_VERSION} ./build.sh -p android -q ${ABI} -c ${BUILD_SAMPLES} ${GENERATE_ARCHIVES} ${BUILD_RELEASE}
rm -rf out/cmake-android-release-*
fi
}
pushd `dirname $0`/../.. > /dev/null
build_android $2
FILAMENT_NDK_VERSION=${FILAMENT_NDK_VERSION} ./build.sh -p android $ANDROID_ABIS -c $BUILD_SAMPLES $GENERATE_ARCHIVES $BUILD_DEBUG $BUILD_RELEASE

View File

@@ -1,6 +1,7 @@
#!/bin/bash
if [ `uname` == "Linux" ];then
source `dirname $0`/../linux/ci-common.sh
curl -OL https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-linux.zip
unzip -q ninja-linux.zip
elif [ `uname` == "Darwin" ];then
curl -OL https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-mac.zip
unzip -q ninja-mac.zip
@@ -12,6 +13,9 @@ fi
chmod +x ninja
export PATH="$PWD:$PATH"
# FIXME: kokoro machines have node and npm but currently they are symlinked to non-existent files
# npm install -g typescript
# Install emscripten.
curl -L https://github.com/emscripten-core/emsdk/archive/refs/tags/3.1.15.zip > emsdk.zip
unzip emsdk.zip ; mv emsdk-* emsdk ; cd emsdk

View File

@@ -115,23 +115,17 @@ cmake ..\.. ^
-DFILAMENT_SUPPORTS_VULKAN=ON ^
|| exit /b
set build_flags=-j %NUMBER_OF_PROCESSORS%
@echo on
:: we've upgraded the windows machines, so the following are no longer accurate as of 09/19/24, but
:: keeping around the comment for record.
:: Attempt to fix "error C1060: compiler is out of heap space" seen on CI.
:: Some resource libraries require significant heap space to compile, so first compile them serially.
:: cmake --build . --target filagui --config %config% %build_flags% || exit /b
:: cmake --build . --target uberarchive --config %config% %build_flags% || exit /b
:: cmake --build . --target gltf-demo-resources --config %config% %build_flags% || exit /b
:: cmake --build . --target filamentapp-resources --config %config% %build_flags% || exit /b
:: cmake --build . --target sample-resources --config %config% %build_flags% || exit /b
:: cmake --build . --target suzanne-resources --config %config% %build_flags% || exit /b
@echo on
cmake --build . --target filagui --config %config% || exit /b
cmake --build . --target uberarchive --config %config% || exit /b
cmake --build . --target gltf-demo-resources --config %config% || exit /b
cmake --build . --target filamentapp-resources --config %config% || exit /b
cmake --build . --target sample-resources --config %config% || exit /b
cmake --build . --target suzanne-resources --config %config% || exit /b
cmake --build . %INSTALL% --config %config% %build_flags% -- /m || exit /b
cmake --build . %INSTALL% --config %config% -- /m || exit /b
@echo off
echo Disk info after building variant: %variant%

View File

@@ -1308,12 +1308,7 @@ Description
declare a variable called `eyeDirection` you can access it in the fragment shader using
`variable_eyeDirection`. In the vertex shader, the interpolant name is simply a member of
the `MaterialVertexInputs` structure (`material.eyeDirection` in your example). Each
interpolant is of type `float4` (`vec4`) in the shaders. By default the precision of the
interpolant is `highp` in *both* the vertex and fragment shaders.
An alternate syntax can be used to specify both the name and precision of the interpolant.
In this case the specified precision is used as-is in both fragment and vertex stages, in
particular if `default` is specified the default precision is used is the fragment shader
(`mediump`) and in the vertex shader (`highp`).
interpolant is of type `float4` (`vec4`) in the shaders.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ JSON
material {
@@ -1325,11 +1320,7 @@ material {
}
],
variables : [
eyeDirection,
{
name : eyeColor,
precision : medium
}
eyeDirection
],
vertexDomain : device,
depthWrite : false,

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@@ -61,12 +61,10 @@ set(SRCS
src/Engine.cpp
src/Exposure.cpp
src/Fence.cpp
src/FilamentBuilder.cpp
src/FrameInfo.cpp
src/FrameSkipper.cpp
src/Froxelizer.cpp
src/Frustum.cpp
src/HwDescriptorSetLayoutFactory.cpp
src/HwRenderPrimitiveFactory.cpp
src/HwVertexBufferInfoFactory.cpp
src/IndexBuffer.cpp
@@ -77,6 +75,8 @@ set(SRCS
src/MaterialInstance.cpp
src/MaterialParser.cpp
src/MorphTargetBuffer.cpp
src/PerViewUniforms.cpp
src/PerShadowMapUniforms.cpp
src/PostProcessManager.cpp
src/RenderPass.cpp
src/RenderPrimitive.cpp
@@ -125,12 +125,6 @@ set(SRCS
src/details/Texture.cpp
src/details/VertexBuffer.cpp
src/details/View.cpp
src/ds/ColorPassDescriptorSet.cpp
src/ds/DescriptorSet.cpp
src/ds/DescriptorSetLayout.cpp
src/ds/PostProcessDescriptorSet.cpp
src/ds/ShadowMapDescriptorSet.cpp
src/ds/SsrPassDescriptorSet.cpp
src/fg/Blackboard.cpp
src/fg/DependencyGraph.cpp
src/fg/FrameGraph.cpp
@@ -154,21 +148,23 @@ set(PRIVATE_HDRS
src/FrameInfo.h
src/FrameSkipper.h
src/Froxelizer.h
src/HwDescriptorSetLayoutFactory.h
src/HwRenderPrimitiveFactory.h
src/HwVertexBufferInfoFactory.h
src/Intersections.h
src/MaterialParser.h
src/PerViewUniforms.h
src/PerShadowMapUniforms.h
src/PIDController.h
src/PostProcessManager.h
src/RendererUtils.h
src/RenderPass.h
src/RenderPrimitive.h
src/RendererUtils.h
src/ResourceAllocator.h
src/ResourceList.h
src/ShadowMap.h
src/ShadowMapManager.h
src/SharedHandle.h
src/TypedUniformBuffer.h
src/UniformBuffer.h
src/components/CameraManager.h
src/components/LightManager.h
@@ -196,14 +192,6 @@ set(PRIVATE_HDRS
src/details/Texture.h
src/details/VertexBuffer.h
src/details/View.h
src/downcast.h
src/ds/ColorPassDescriptorSet.h
src/ds/DescriptorSetLayout.h
src/ds/PostProcessDescriptorSet.h
src/ds/ShadowMapDescriptorSet.h
src/ds/SsrPassDescriptorSet.h
src/ds/TypedBuffer.h
src/ds/TypedUniformBuffer.h
src/fg/Blackboard.h
src/fg/FrameGraph.h
src/fg/FrameGraphId.h
@@ -221,6 +209,7 @@ set(PRIVATE_HDRS
src/materials/fsr/ffx_a.h
src/materials/fsr/ffx_fsr1.h
src/materials/fsr/ffx_fsr1_mobile.fs
src/downcast.h
)
set(MATERIAL_SRCS
@@ -308,11 +297,6 @@ if (FILAMENT_ENABLE_MULTIVIEW)
add_definitions(-DFILAMENT_ENABLE_MULTIVIEW)
endif()
# Whether to force the profiling mode.
if (FILAMENT_FORCE_PROFILING_MODE)
add_definitions(-DFILAMENT_FORCE_PROFILING_MODE)
endif()
# ==================================================================================================
# Definitions
# ==================================================================================================
@@ -612,13 +596,6 @@ else()
-Wover-aligned
-Werror
)
if (CMAKE_CXX_STANDARD EQUAL 20)
# The lambdas for passes in PostProcessManager.cpp capture this
# implicitly in a way that's deprecated in c++20, but can't easily be
# fixed in a way that's backwards compatible with c++17:
# https://www.nextptr.com/tutorial/ta1430524603/capture-this-in-lambda-expression-timeline-of-change
list(APPEND FILAMENT_WARNINGS -Wno-deprecated-this-capture)
endif()
endif()
target_compile_options(${TARGET} PRIVATE

View File

@@ -12,7 +12,6 @@ set(PUBLIC_HDRS
include/backend/AcquiredImage.h
include/backend/BufferDescriptor.h
include/backend/CallbackHandler.h
include/backend/DescriptorSetOffsetArray.h
include/backend/DriverApiForward.h
include/backend/DriverEnums.h
include/backend/Handle.h
@@ -70,13 +69,9 @@ set(PRIVATE_HDRS
if (FILAMENT_SUPPORTS_OPENGL AND NOT FILAMENT_USE_EXTERNAL_GLES3)
list(APPEND SRCS
include/backend/platforms/OpenGLPlatform.h
src/opengl/BindingMap.h
src/opengl/gl_headers.cpp
src/opengl/gl_headers.h
src/opengl/GLBufferObject.h
src/opengl/GLDescriptorSet.cpp
src/opengl/GLDescriptorSet.h
src/opengl/GLDescriptorSetLayout.h
src/opengl/GLTexture.h
src/opengl/GLUtils.cpp
src/opengl/GLUtils.h
@@ -119,8 +114,6 @@ if (FILAMENT_SUPPORTS_OPENGL AND NOT FILAMENT_USE_EXTERNAL_GLES3)
list(APPEND SRCS src/opengl/platforms/PlatformGLX.cpp)
elseif (FILAMENT_SUPPORTS_EGL_ON_LINUX)
list(APPEND SRCS src/opengl/platforms/PlatformEGLHeadless.cpp)
elseif (FILAMENT_SUPPORTS_OSMESA)
list(APPEND SRCS src/opengl/platforms/PlatformOSMesa.cpp)
endif()
elseif (WIN32)
list(APPEND SRCS src/opengl/platforms/PlatformWGL.cpp)
@@ -179,18 +172,11 @@ if (FILAMENT_SUPPORTS_VULKAN)
src/vulkan/caching/VulkanDescriptorSetManager.h
src/vulkan/caching/VulkanPipelineLayoutCache.cpp
src/vulkan/caching/VulkanPipelineLayoutCache.h
src/vulkan/memory/ResourceManager.cpp
src/vulkan/memory/ResourceManager.h
src/vulkan/memory/ResourcePointer.h
src/vulkan/memory/Resource.cpp
src/vulkan/memory/Resource.h
src/vulkan/platform/VulkanPlatform.cpp
src/vulkan/platform/VulkanPlatformSwapChainImpl.cpp
src/vulkan/platform/VulkanPlatformSwapChainImpl.h
src/vulkan/spirv/VulkanSpirvUtils.cpp
src/vulkan/spirv/VulkanSpirvUtils.h
src/vulkan/VulkanAsyncHandles.cpp
src/vulkan/VulkanAsyncHandles.h
src/vulkan/VulkanBlitter.cpp
src/vulkan/VulkanBlitter.h
src/vulkan/VulkanBuffer.cpp
@@ -221,6 +207,9 @@ if (FILAMENT_SUPPORTS_VULKAN)
src/vulkan/VulkanSwapChain.h
src/vulkan/VulkanReadPixels.cpp
src/vulkan/VulkanReadPixels.h
src/vulkan/VulkanResourceAllocator.h
src/vulkan/VulkanResources.cpp
src/vulkan/VulkanResources.h
src/vulkan/VulkanTexture.cpp
src/vulkan/VulkanTexture.h
src/vulkan/VulkanUtility.cpp
@@ -372,11 +361,6 @@ set(LINUX_LINKER_OPTIMIZATION_FLAGS
-Wl,--exclude-libs,bluegl
)
if (LINUX AND FILAMENT_SUPPORTS_OSMESA)
set(OSMESA_COMPILE_FLAGS
-I${FILAMENT_OSMESA_PATH}/include/GL)
endif()
if (MSVC)
set(FILAMENT_WARNINGS /W3)
else()
@@ -388,9 +372,6 @@ else()
-Wover-aligned
-Werror
)
if (CMAKE_CXX_STANDARD EQUAL 20)
list(APPEND FILAMENT_WARNINGS -Wno-deprecated-this-capture)
endif()
endif()
if (APPLE)
@@ -400,7 +381,6 @@ endif()
target_compile_options(${TARGET} PRIVATE
${FILAMENT_WARNINGS}
${OSMESA_COMPILE_FLAGS}
$<$<CONFIG:Release>:${OPTIMIZATION_FLAGS}>
$<$<AND:$<PLATFORM_ID:Darwin>,$<CONFIG:Release>>:${DARWIN_OPTIMIZATION_FLAGS}>
)
@@ -410,7 +390,6 @@ if (FILAMENT_SUPPORTS_METAL)
endif()
target_link_libraries(${TARGET} PRIVATE
${OSMESA_LINKER_FLAGS}
$<$<AND:$<PLATFORM_ID:Linux>,$<CONFIG:Release>>:${LINUX_LINKER_OPTIMIZATION_FLAGS}>
)
@@ -445,7 +424,6 @@ if (APPLE OR LINUX)
test/test_StencilBuffer.cpp
test/test_Scissor.cpp
test/test_MipLevels.cpp
test/test_Handles.cpp
)
set(BACKEND_TEST_LIBS
backend
@@ -517,40 +495,21 @@ endif()
# ==================================================================================================
# Compute tests
#
#if (NOT IOS AND NOT WEBGL)
#
#add_executable(compute_test
# test/ComputeTest.cpp
# test/Arguments.cpp
# test/test_ComputeBasic.cpp
# )
#
#target_link_libraries(compute_test PRIVATE
# backend
# getopt
# gtest
# )
#
#set_target_properties(compute_test PROPERTIES FOLDER Tests)
#
#endif()
# ==================================================================================================
# Metal utils tests
if (NOT IOS AND NOT WEBGL)
if (APPLE AND NOT IOS)
add_executable(compute_test
test/ComputeTest.cpp
test/Arguments.cpp
test/test_ComputeBasic.cpp
)
add_executable(metal_utils_test test/MetalTest.mm)
target_compile_options(metal_utils_test PRIVATE "-fobjc-arc")
target_link_libraries(metal_utils_test PRIVATE
target_link_libraries(compute_test PRIVATE
backend
getopt
gtest
)
set_target_properties(metal_utils_test PROPERTIES FOLDER Tests)
set_target_properties(compute_test PROPERTIES FOLDER Tests)
endif()

View File

@@ -1,101 +0,0 @@
/*
* Copyright (C) 2024 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TNT_FILAMENT_BACKEND_COMMANDSTREAMVECTOR_H
#define TNT_FILAMENT_BACKEND_COMMANDSTREAMVECTOR_H
#include <backend/DriverApiForward.h>
#include <initializer_list>
#include <memory>
#include <stddef.h>
#include <stdint.h>
namespace filament::backend {
void* allocateFromCommandStream(DriverApi& driver, size_t size, size_t alignment) noexcept;
class DescriptorSetOffsetArray {
public:
using value_type = uint32_t;
using reference = value_type&;
using const_reference = value_type const&;
using size_type = uint32_t;
using difference_type = int32_t;
using pointer = value_type*;
using const_pointer = value_type const*;
using iterator = pointer;
using const_iterator = const_pointer;
DescriptorSetOffsetArray() noexcept = default;
~DescriptorSetOffsetArray() noexcept = default;
DescriptorSetOffsetArray(size_type size, DriverApi& driver) noexcept {
mOffsets = (value_type *)allocateFromCommandStream(driver,
size * sizeof(value_type), alignof(value_type));
std::uninitialized_fill_n(mOffsets, size, 0);
}
DescriptorSetOffsetArray(std::initializer_list<uint32_t> list, DriverApi& driver) noexcept {
mOffsets = (value_type *)allocateFromCommandStream(driver,
list.size() * sizeof(value_type), alignof(value_type));
std::uninitialized_copy(list.begin(), list.end(), mOffsets);
}
DescriptorSetOffsetArray(DescriptorSetOffsetArray const&) = delete;
DescriptorSetOffsetArray& operator=(DescriptorSetOffsetArray const&) = delete;
DescriptorSetOffsetArray(DescriptorSetOffsetArray&& rhs) noexcept
: mOffsets(rhs.mOffsets) {
rhs.mOffsets = nullptr;
}
DescriptorSetOffsetArray& operator=(DescriptorSetOffsetArray&& rhs) noexcept {
if (this != &rhs) {
mOffsets = rhs.mOffsets;
rhs.mOffsets = nullptr;
}
return *this;
}
bool empty() const noexcept { return mOffsets == nullptr; }
value_type* data() noexcept { return mOffsets; }
const value_type* data() const noexcept { return mOffsets; }
reference operator[](size_type n) noexcept {
return *(data() + n);
}
const_reference operator[](size_type n) const noexcept {
return *(data() + n);
}
void clear() noexcept {
mOffsets = nullptr;
}
private:
value_type *mOffsets = nullptr;
};
} // namespace filament::backend
#endif //TNT_FILAMENT_BACKEND_COMMANDSTREAMVECTOR_H

View File

@@ -19,16 +19,13 @@
#ifndef TNT_FILAMENT_BACKEND_DRIVERENUMS_H
#define TNT_FILAMENT_BACKEND_DRIVERENUMS_H
#include <utils/BitmaskEnum.h>
#include <utils/unwindows.h> // Because we define ERROR in the FenceStatus enum.
#include <backend/Platform.h>
#include <backend/PresentCallable.h>
#include <utils/BitmaskEnum.h>
#include <utils/FixedCapacityVector.h>
#include <utils/Invocable.h>
#include <utils/compiler.h>
#include <utils/debug.h>
#include <utils/ostream.h>
#include <math/vec4.h>
@@ -100,8 +97,6 @@ static constexpr size_t MAX_VERTEX_ATTRIBUTE_COUNT = 16; // This is guarantee
static constexpr size_t MAX_SAMPLER_COUNT = 62; // Maximum needed at feature level 3.
static constexpr size_t MAX_VERTEX_BUFFER_COUNT = 16; // Max number of bound buffer objects.
static constexpr size_t MAX_SSBO_COUNT = 4; // This is guaranteed by OpenGL ES.
static constexpr size_t MAX_DESCRIPTOR_SET_COUNT = 4; // This is guaranteed by Vulkan.
static constexpr size_t MAX_DESCRIPTOR_COUNT = 64; // per set
static constexpr size_t MAX_PUSH_CONSTANT_COUNT = 32; // Vulkan 1.1 spec allows for 128-byte
// of push constant (we assume 4-byte
@@ -196,71 +191,6 @@ static constexpr const char* shaderLanguageToString(ShaderLanguage shaderLanguag
}
}
enum class ShaderStage : uint8_t {
VERTEX = 0,
FRAGMENT = 1,
COMPUTE = 2
};
static constexpr size_t PIPELINE_STAGE_COUNT = 2;
enum class ShaderStageFlags : uint8_t {
NONE = 0,
VERTEX = 0x1,
FRAGMENT = 0x2,
COMPUTE = 0x4,
ALL_SHADER_STAGE_FLAGS = VERTEX | FRAGMENT | COMPUTE
};
static inline constexpr bool hasShaderType(ShaderStageFlags flags, ShaderStage type) noexcept {
switch (type) {
case ShaderStage::VERTEX:
return bool(uint8_t(flags) & uint8_t(ShaderStageFlags::VERTEX));
case ShaderStage::FRAGMENT:
return bool(uint8_t(flags) & uint8_t(ShaderStageFlags::FRAGMENT));
case ShaderStage::COMPUTE:
return bool(uint8_t(flags) & uint8_t(ShaderStageFlags::COMPUTE));
}
}
enum class DescriptorType : uint8_t {
UNIFORM_BUFFER,
SHADER_STORAGE_BUFFER,
SAMPLER,
INPUT_ATTACHMENT,
SAMPLER_EXTERNAL
};
enum class DescriptorFlags : uint8_t {
NONE = 0x00,
DYNAMIC_OFFSET = 0x01
};
using descriptor_set_t = uint8_t;
using descriptor_binding_t = uint8_t;
struct DescriptorSetLayoutBinding {
DescriptorType type;
ShaderStageFlags stageFlags;
descriptor_binding_t binding;
DescriptorFlags flags = DescriptorFlags::NONE;
uint16_t count = 0;
friend inline bool operator==(
DescriptorSetLayoutBinding const& lhs,
DescriptorSetLayoutBinding const& rhs) noexcept {
return lhs.type == rhs.type &&
lhs.flags == rhs.flags &&
lhs.count == rhs.count &&
lhs.stageFlags == rhs.stageFlags;
}
};
struct DescriptorSetLayout {
utils::FixedCapacityVector<DescriptorSetLayoutBinding> bindings;
};
/**
* Bitmask for selecting render buffers
*/
@@ -319,20 +249,9 @@ struct Viewport {
int32_t right() const noexcept { return left + int32_t(width); }
//! get the top coordinate in window space of the viewport
int32_t top() const noexcept { return bottom + int32_t(height); }
friend bool operator==(Viewport const& lhs, Viewport const& rhs) noexcept {
// clang can do this branchless with xor/or
return lhs.left == rhs.left && lhs.bottom == rhs.bottom &&
lhs.width == rhs.width && lhs.height == rhs.height;
}
friend bool operator!=(Viewport const& lhs, Viewport const& rhs) noexcept {
// clang is being dumb and uses branches
return bool(((lhs.left ^ rhs.left) | (lhs.bottom ^ rhs.bottom)) |
((lhs.width ^ rhs.width) | (lhs.height ^ rhs.height)));
}
};
/**
* Specifies the mapping of the near and far clipping plane to window coordinates.
*/
@@ -351,6 +270,15 @@ enum class FenceStatus : int8_t {
TIMEOUT_EXPIRED = 1, //!< wait()'s timeout expired. The Fence condition is not satisfied.
};
/**
* Status codes for sync objects
*/
enum class SyncStatus : int8_t {
ERROR = -1, //!< An error occurred. The Sync is not signaled.
SIGNALED = 0, //!< The Sync is signaled.
NOT_SIGNALED = 1, //!< The Sync is not signaled yet
};
static constexpr uint64_t FENCE_WAIT_FOR_EVER = uint64_t(-1);
/**
@@ -440,18 +368,6 @@ enum class SamplerType : uint8_t {
SAMPLER_CUBEMAP_ARRAY, //!< Cube map array texture (feature level 2)
};
inline const char* stringify(SamplerType samplerType) {
switch (samplerType) {
case SamplerType::SAMPLER_2D: return "SAMPLER_2D";
case SamplerType::SAMPLER_2D_ARRAY: return "SAMPLER_2D_ARRAY";
case SamplerType::SAMPLER_CUBEMAP: return "SAMPLER_CUBEMAP";
case SamplerType::SAMPLER_EXTERNAL: return "SAMPLER_EXTERNAL";
case SamplerType::SAMPLER_3D: return "SAMPLER_3D";
case SamplerType::SAMPLER_CUBEMAP_ARRAY: return "SAMPLER_CUBEMAP_ARRAY";
}
return "UNKNOWN";
}
//! Subpass type
enum class SubpassType : uint8_t {
SUBPASS_INPUT
@@ -777,27 +693,9 @@ enum class TextureUsage : uint16_t {
BLIT_SRC = 0x0040, //!< Texture can be used the source of a blit()
BLIT_DST = 0x0080, //!< Texture can be used the destination of a blit()
PROTECTED = 0x0100, //!< Texture can be used for protected content
DEFAULT = UPLOADABLE | SAMPLEABLE, //!< Default texture usage
ALL_ATTACHMENTS = COLOR_ATTACHMENT | DEPTH_ATTACHMENT | STENCIL_ATTACHMENT | SUBPASS_INPUT, //!< Mask of all attachments
DEFAULT = UPLOADABLE | SAMPLEABLE //!< Default texture usage
};
inline const char* stringify(TextureUsage usage) {
switch (usage) {
case TextureUsage::NONE: return "NONE";
case TextureUsage::COLOR_ATTACHMENT: return "COLOR_ATTACHMENT";
case TextureUsage::DEPTH_ATTACHMENT: return "DEPTH_ATTACHMENT";
case TextureUsage::STENCIL_ATTACHMENT: return "STENCIL_ATTACHMENT";
case TextureUsage::UPLOADABLE: return "UPLOADABLE";
case TextureUsage::SAMPLEABLE: return "SAMPLEABLE";
case TextureUsage::SUBPASS_INPUT: return "SUBPASS_INPUT";
case TextureUsage::BLIT_SRC: return "BLIT_SRC";
case TextureUsage::BLIT_DST: return "BLIT_DST";
case TextureUsage::PROTECTED: return "PROTECTED";
case TextureUsage::DEFAULT: return "DEFAULT";
default: return "UNKNOWN";
}
}
//! Texture swizzle
enum class TextureSwizzle : uint8_t {
SUBSTITUTE_ZERO,
@@ -989,9 +887,6 @@ struct SamplerParams { // NOLINT
struct EqualTo {
bool operator()(SamplerParams lhs, SamplerParams rhs) const noexcept {
assert_invariant(lhs.padding0 == 0);
assert_invariant(lhs.padding1 == 0);
assert_invariant(lhs.padding2 == 0);
auto* pLhs = reinterpret_cast<uint32_t const*>(reinterpret_cast<char const*>(&lhs));
auto* pRhs = reinterpret_cast<uint32_t const*>(reinterpret_cast<char const*>(&rhs));
return *pLhs == *pRhs;
@@ -1000,9 +895,6 @@ struct SamplerParams { // NOLINT
struct LessThan {
bool operator()(SamplerParams lhs, SamplerParams rhs) const noexcept {
assert_invariant(lhs.padding0 == 0);
assert_invariant(lhs.padding1 == 0);
assert_invariant(lhs.padding2 == 0);
auto* pLhs = reinterpret_cast<uint32_t const*>(reinterpret_cast<char const*>(&lhs));
auto* pRhs = reinterpret_cast<uint32_t const*>(reinterpret_cast<char const*>(&rhs));
return *pLhs == *pRhs;
@@ -1010,12 +902,6 @@ struct SamplerParams { // NOLINT
};
private:
friend inline bool operator == (SamplerParams lhs, SamplerParams rhs) noexcept {
return SamplerParams::EqualTo{}(lhs, rhs);
}
friend inline bool operator != (SamplerParams lhs, SamplerParams rhs) noexcept {
return !SamplerParams::EqualTo{}(lhs, rhs);
}
friend inline bool operator < (SamplerParams lhs, SamplerParams rhs) noexcept {
return SamplerParams::LessThan{}(lhs, rhs);
}
@@ -1183,6 +1069,32 @@ struct RasterState {
* \privatesection
*/
enum class ShaderStage : uint8_t {
VERTEX = 0,
FRAGMENT = 1,
COMPUTE = 2
};
static constexpr size_t PIPELINE_STAGE_COUNT = 2;
enum class ShaderStageFlags : uint8_t {
NONE = 0,
VERTEX = 0x1,
FRAGMENT = 0x2,
COMPUTE = 0x4,
ALL_SHADER_STAGE_FLAGS = VERTEX | FRAGMENT | COMPUTE
};
static inline constexpr bool hasShaderType(ShaderStageFlags flags, ShaderStage type) noexcept {
switch (type) {
case ShaderStage::VERTEX:
return bool(uint8_t(flags) & uint8_t(ShaderStageFlags::VERTEX));
case ShaderStage::FRAGMENT:
return bool(uint8_t(flags) & uint8_t(ShaderStageFlags::FRAGMENT));
case ShaderStage::COMPUTE:
return bool(uint8_t(flags) & uint8_t(ShaderStageFlags::COMPUTE));
}
}
/**
* Selects which buffers to clear at the beginning of the render pass, as well as which buffers
* can be discarded at the beginning and end of the render pass.
@@ -1347,8 +1259,6 @@ template<> struct utils::EnableBitMaskOperators<filament::backend::ShaderStageFl
: public std::true_type {};
template<> struct utils::EnableBitMaskOperators<filament::backend::TargetBufferFlags>
: public std::true_type {};
template<> struct utils::EnableBitMaskOperators<filament::backend::DescriptorFlags>
: public std::true_type {};
template<> struct utils::EnableBitMaskOperators<filament::backend::TextureUsage>
: public std::true_type {};
template<> struct utils::EnableBitMaskOperators<filament::backend::StencilFace>

View File

@@ -23,7 +23,6 @@
#include <utils/debug.h>
#include <type_traits> // FIXME: STL headers are not allowed in public headers
#include <utility>
#include <stdint.h>
@@ -42,8 +41,6 @@ struct HwTexture;
struct HwTimerQuery;
struct HwVertexBufferInfo;
struct HwVertexBuffer;
struct HwDescriptorSetLayout;
struct HwDescriptorSet;
/*
* A handle to a backend resource. HandleBase is for internal use only.
@@ -107,18 +104,8 @@ struct Handle : public HandleBase {
Handle(Handle const& rhs) noexcept = default;
Handle(Handle&& rhs) noexcept = default;
// Explicitly redefine copy/move assignment operators rather than just using default here.
// Because it doesn't make a call to the parent's method automatically during the std::move
// function call(https://en.cppreference.com/w/cpp/algorithm/move) in certain compilers like
// NDK 25.1.8937393 and below (see b/371980551)
Handle& operator=(Handle const& rhs) noexcept {
HandleBase::operator=(rhs);
return *this;
}
Handle& operator=(Handle&& rhs) noexcept {
HandleBase::operator=(std::move(rhs));
return *this;
}
Handle& operator=(Handle const& rhs) noexcept = default;
Handle& operator=(Handle&& rhs) noexcept = default;
explicit Handle(HandleId id) noexcept : HandleBase(id) { }
@@ -143,21 +130,19 @@ private:
// Types used by the command stream
// (we use this renaming because the macro-system doesn't deal well with "<" and ">")
using BufferObjectHandle = Handle<HwBufferObject>;
using FenceHandle = Handle<HwFence>;
using IndexBufferHandle = Handle<HwIndexBuffer>;
using ProgramHandle = Handle<HwProgram>;
using RenderPrimitiveHandle = Handle<HwRenderPrimitive>;
using RenderTargetHandle = Handle<HwRenderTarget>;
using SamplerGroupHandle = Handle<HwSamplerGroup>;
using StreamHandle = Handle<HwStream>;
using SwapChainHandle = Handle<HwSwapChain>;
using TextureHandle = Handle<HwTexture>;
using TimerQueryHandle = Handle<HwTimerQuery>;
using VertexBufferHandle = Handle<HwVertexBuffer>;
using VertexBufferInfoHandle = Handle<HwVertexBufferInfo>;
using DescriptorSetLayoutHandle = Handle<HwDescriptorSetLayout>;
using DescriptorSetHandle = Handle<HwDescriptorSet>;
using BufferObjectHandle = Handle<HwBufferObject>;
using FenceHandle = Handle<HwFence>;
using IndexBufferHandle = Handle<HwIndexBuffer>;
using ProgramHandle = Handle<HwProgram>;
using RenderPrimitiveHandle = Handle<HwRenderPrimitive>;
using RenderTargetHandle = Handle<HwRenderTarget>;
using SamplerGroupHandle = Handle<HwSamplerGroup>;
using StreamHandle = Handle<HwStream>;
using SwapChainHandle = Handle<HwSwapChain>;
using TextureHandle = Handle<HwTexture>;
using TimerQueryHandle = Handle<HwTimerQuery>;
using VertexBufferHandle = Handle<HwVertexBuffer>;
using VertexBufferInfoHandle = Handle<HwVertexBufferInfo>;
} // namespace filament::backend

View File

@@ -22,23 +22,15 @@
#include <utils/ostream.h>
#include <array>
#include <stdint.h>
namespace filament::backend {
//! \privatesection
struct PipelineLayout {
using SetLayout = std::array<Handle<HwDescriptorSetLayout>, MAX_DESCRIPTOR_SET_COUNT>;
SetLayout setLayout; // 16
};
struct PipelineState {
Handle<HwProgram> program; // 4
Handle<HwVertexBufferInfo> vertexBufferInfo; // 4
PipelineLayout pipelineLayout; // 16
RasterState rasterState; // 4
StencilState stencilState; // 12
PolygonOffset polygonOffset; // 8

View File

@@ -68,6 +68,13 @@ public:
*/
size_t handleArenaSize = 0;
/**
* This number of most-recently destroyed textures will be tracked for use-after-free.
* Throws an exception when a texture is freed but still bound to a SamplerGroup and used in
* a draw call. 0 disables completely. Currently only respected by the Metal backend.
*/
size_t textureUseAfterFreePoolSize = 0;
size_t metalUploadBufferSizeBytes = 512 * 1024;
/**
@@ -91,13 +98,6 @@ public:
* Sets the technique for stereoscopic rendering.
*/
StereoscopicType stereoscopicType = StereoscopicType::NONE;
/**
* Assert the native window associated to a SwapChain is valid when calling makeCurrent().
* This is only supported for:
* - PlatformEGLAndroid
*/
bool assertNativeWindowIsValid = false;
};
Platform() noexcept;

View File

@@ -24,11 +24,9 @@
#include <backend/DriverEnums.h>
#include <array>
#include <unordered_map>
#include <tuple>
#include <utility>
#include <variant>
#include <array> // FIXME: STL headers are not allowed in public headers
#include <utility> // FIXME: STL headers are not allowed in public headers
#include <variant> // FIXME: STL headers are not allowed in public headers
#include <stddef.h>
#include <stdint.h>
@@ -42,36 +40,29 @@ public:
static constexpr size_t UNIFORM_BINDING_COUNT = CONFIG_UNIFORM_BINDING_COUNT;
static constexpr size_t SAMPLER_BINDING_COUNT = CONFIG_SAMPLER_BINDING_COUNT;
struct Descriptor {
utils::CString name;
backend::DescriptorType type;
backend::descriptor_binding_t binding;
struct Sampler {
utils::CString name = {}; // name of the sampler in the shader
uint32_t binding = 0; // binding point of the sampler in the shader
};
struct SpecializationConstant {
using Type = std::variant<int32_t, float, bool>;
uint32_t id; // id set in glsl
Type value; // value and type
struct SamplerGroupData {
utils::FixedCapacityVector<Sampler> samplers;
ShaderStageFlags stageFlags = ShaderStageFlags::ALL_SHADER_STAGE_FLAGS;
};
struct Uniform { // For ES2 support
struct Uniform {
utils::CString name; // full qualified name of the uniform field
uint16_t offset; // offset in 'uint32_t' into the uniform buffer
uint8_t size; // >1 for arrays
UniformType type; // uniform type
};
using DescriptorBindingsInfo = utils::FixedCapacityVector<Descriptor>;
using DescriptorSetInfo = std::array<DescriptorBindingsInfo, MAX_DESCRIPTOR_SET_COUNT>;
using SpecializationConstantsInfo = utils::FixedCapacityVector<SpecializationConstant>;
using UniformBlockInfo = std::array<utils::CString, UNIFORM_BINDING_COUNT>;
using UniformInfo = utils::FixedCapacityVector<Uniform>;
using SamplerGroupInfo = std::array<SamplerGroupData, SAMPLER_BINDING_COUNT>;
using ShaderBlob = utils::FixedCapacityVector<uint8_t>;
using ShaderSource = std::array<ShaderBlob, SHADER_TYPE_COUNT>;
using AttributesInfo = utils::FixedCapacityVector<std::pair<utils::CString, uint8_t>>;
using UniformInfo = utils::FixedCapacityVector<Uniform>;
using BindingUniformsInfo = utils::FixedCapacityVector<
std::tuple<uint8_t, utils::CString, Program::UniformInfo>>;
Program() noexcept;
Program(const Program& rhs) = delete;
@@ -88,19 +79,43 @@ public:
Program& diagnostics(utils::CString const& name,
utils::Invocable<utils::io::ostream&(utils::io::ostream& out)>&& logger);
// Sets one of the program's shader (e.g. vertex, fragment)
// sets one of the program's shader (e.g. vertex, fragment)
// string-based shaders are null terminated, consequently the size parameter must include the
// null terminating character.
Program& shader(ShaderStage shader, void const* data, size_t size);
// Sets the language of the shader sources provided with shader() (defaults to ESSL3)
// sets the language of the shader sources provided with shader() (defaults to ESSL3)
Program& shaderLanguage(ShaderLanguage shaderLanguage);
// Descriptor binding (set, binding, type -> shader name) info
Program& descriptorBindings(backend::descriptor_set_t set,
DescriptorBindingsInfo descriptorBindings) noexcept;
// Note: This is only needed for GLES3.0 backends, because the layout(binding=) syntax is
// not permitted in glsl. The backend needs a way to associate a uniform block
// to a binding point.
Program& uniformBlockBindings(
utils::FixedCapacityVector<std::pair<utils::CString, uint8_t>> const& uniformBlockBindings) noexcept;
Program& specializationConstants(SpecializationConstantsInfo specConstants) noexcept;
// Note: This is only needed for GLES2.0, this is used to emulate UBO. This function tells
// the program everything it needs to know about the uniforms at a given binding
Program& uniforms(uint32_t index, UniformInfo const& uniforms) noexcept;
// Note: This is only needed for GLES2.0.
Program& attributes(
utils::FixedCapacityVector<std::pair<utils::CString, uint8_t>> attributes) noexcept;
// sets the 'bindingPoint' sampler group descriptor for this program.
// 'samplers' can be destroyed after this call.
// This effectively associates a set of (BindingPoints, index) to a texture unit in the shader.
// Or more precisely, what layout(binding=) is set to in GLSL.
Program& setSamplerGroup(size_t bindingPoint, ShaderStageFlags stageFlags,
Sampler const* samplers, size_t count) noexcept;
struct SpecializationConstant {
using Type = std::variant<int32_t, float, bool>;
uint32_t id; // id set in glsl
Type value; // value and type
};
Program& specializationConstants(
utils::FixedCapacityVector<SpecializationConstant> specConstants) noexcept;
struct PushConstant {
utils::CString name;
@@ -114,40 +129,33 @@ public:
Program& multiview(bool multiview) noexcept;
// For ES2 support only...
Program& uniforms(uint32_t index, utils::CString name, UniformInfo uniforms) noexcept;
Program& attributes(AttributesInfo attributes) noexcept;
//
// Getters for program construction...
//
ShaderSource const& getShadersSource() const noexcept { return mShadersSource; }
ShaderSource& getShadersSource() noexcept { return mShadersSource; }
UniformBlockInfo const& getUniformBlockBindings() const noexcept { return mUniformBlocks; }
UniformBlockInfo& getUniformBlockBindings() noexcept { return mUniformBlocks; }
SamplerGroupInfo const& getSamplerGroupInfo() const { return mSamplerGroups; }
SamplerGroupInfo& getSamplerGroupInfo() { return mSamplerGroups; }
auto const& getBindingUniformInfo() const { return mBindingUniformInfo; }
auto& getBindingUniformInfo() { return mBindingUniformInfo; }
auto const& getAttributes() const { return mAttributes; }
auto& getAttributes() { return mAttributes; }
utils::CString const& getName() const noexcept { return mName; }
utils::CString& getName() noexcept { return mName; }
auto const& getShaderLanguage() const { return mShaderLanguage; }
uint64_t getCacheId() const noexcept { return mCacheId; }
bool isMultiview() const noexcept { return mMultiview; }
CompilerPriorityQueue getPriorityQueue() const noexcept { return mPriorityQueue; }
SpecializationConstantsInfo const& getSpecializationConstants() const noexcept {
utils::FixedCapacityVector<SpecializationConstant> const& getSpecializationConstants() const noexcept {
return mSpecializationConstants;
}
SpecializationConstantsInfo& getSpecializationConstants() noexcept {
utils::FixedCapacityVector<SpecializationConstant>& getSpecializationConstants() noexcept {
return mSpecializationConstants;
}
DescriptorSetInfo& getDescriptorBindings() noexcept {
return mDescriptorBindings;
}
utils::FixedCapacityVector<PushConstant> const& getPushConstants(
ShaderStage stage) const noexcept {
return mPushConstants[static_cast<uint8_t>(stage)];
@@ -157,29 +165,27 @@ public:
return mPushConstants[static_cast<uint8_t>(stage)];
}
auto const& getBindingUniformInfo() const { return mBindingUniformsInfo; }
auto& getBindingUniformInfo() { return mBindingUniformsInfo; }
uint64_t getCacheId() const noexcept { return mCacheId; }
auto const& getAttributes() const { return mAttributes; }
auto& getAttributes() { return mAttributes; }
bool isMultiview() const noexcept { return mMultiview; }
CompilerPriorityQueue getPriorityQueue() const noexcept { return mPriorityQueue; }
private:
friend utils::io::ostream& operator<<(utils::io::ostream& out, const Program& builder);
UniformBlockInfo mUniformBlocks = {};
SamplerGroupInfo mSamplerGroups = {};
ShaderSource mShadersSource;
ShaderLanguage mShaderLanguage = ShaderLanguage::ESSL3;
utils::CString mName;
uint64_t mCacheId{};
CompilerPriorityQueue mPriorityQueue = CompilerPriorityQueue::HIGH;
utils::Invocable<utils::io::ostream&(utils::io::ostream& out)> mLogger;
SpecializationConstantsInfo mSpecializationConstants;
utils::FixedCapacityVector<SpecializationConstant> mSpecializationConstants;
std::array<utils::FixedCapacityVector<PushConstant>, SHADER_TYPE_COUNT> mPushConstants;
DescriptorSetInfo mDescriptorBindings;
// For ES2 support only
AttributesInfo mAttributes;
BindingUniformsInfo mBindingUniformsInfo;
utils::FixedCapacityVector<std::pair<utils::CString, uint8_t>> mAttributes;
std::array<UniformInfo, Program::UNIFORM_BINDING_COUNT> mBindingUniformInfo;
CompilerPriorityQueue mPriorityQueue = CompilerPriorityQueue::HIGH;
// Indicates the current engine was initialized with multiview stereo, and the variant for this
// program contains STE flag. This will be referred later for the OpenGL shader compiler to
// determine whether shader code replacement for the num_views should be performed.

View File

@@ -30,6 +30,10 @@ namespace filament::backend {
struct TargetBufferInfo {
// note: the parameters of this constructor are not in the order of this structure's fields
TargetBufferInfo(Handle<HwTexture> handle, uint8_t level, uint16_t layer, uint8_t baseViewIndex) noexcept
: handle(handle), baseViewIndex(baseViewIndex), level(level), layer(layer) {
}
TargetBufferInfo(Handle<HwTexture> handle, uint8_t level, uint16_t layer) noexcept
: handle(handle), level(level), layer(layer) {
}
@@ -47,15 +51,14 @@ struct TargetBufferInfo {
// texture to be used as render target
Handle<HwTexture> handle;
// Starting layer index for multiview. This value is only used when the `layerCount` for the
// render target is greater than 1.
uint8_t baseViewIndex = 0;
// level to be used
uint8_t level = 0;
// - For cubemap textures, this indicates the face of the cubemap. See TextureCubemapFace for
// the face->layer mapping)
// - For 2d array, cubemap array, and 3d textures, this indicates an index of a single layer of
// them.
// - For multiview textures (i.e., layerCount for the RenderTarget is greater than 1), this
// indicates a starting layer index of the current 2d array texture for multiview.
// For cubemaps and 3D textures. See TextureCubemapFace for the face->layer mapping
uint16_t layer = 0;
};
@@ -100,7 +103,7 @@ public:
// this is here for backward compatibility
MRT(Handle<HwTexture> handle, uint8_t level, uint16_t layer) noexcept
: mInfos{{ handle, level, layer }} {
: mInfos{{ handle, level, layer, 0 }} {
}
};

View File

@@ -89,11 +89,6 @@ protected:
*/
AcquiredImage transformAcquiredImage(AcquiredImage source) noexcept override;
protected:
bool makeCurrent(ContextType type,
SwapChain* drawSwapChain,
SwapChain* readSwapChain) noexcept override;
private:
struct InitializeJvmForPerformanceManagerIfNeeded {
InitializeJvmForPerformanceManagerIfNeeded();
@@ -107,10 +102,6 @@ private:
using clock = std::chrono::high_resolution_clock;
clock::time_point mStartTimeOfActualWork;
void* mNativeWindowLib = nullptr;
int32_t (*ANativeWindow_getBuffersDefaultDataSpace)(ANativeWindow* window) = nullptr;
bool mAssertNativeWindowIsValid = false;
};
} // namespace filament::backend

View File

@@ -1,64 +0,0 @@
/*
* Copyright (C) 2024 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TNT_FILAMENT_BACKEND_OPENGL_OPENGL_PLATFORM_OSMESA_H
#define TNT_FILAMENT_BACKEND_OPENGL_OPENGL_PLATFORM_OSMESA_H
#include <stdint.h>
#include "bluegl/BlueGL.h"
#include <osmesa.h>
#include <backend/platforms/OpenGLPlatform.h>
#include <backend/DriverEnums.h>
namespace filament::backend {
/**
* A concrete implementation of OpenGLPlatform that uses OSMesa, which is an offscreen
* context that can be used in conjunction with Mesa for software rasterization.
* See https://docs.mesa3d.org/osmesa.html for more information.
*/
class PlatformOSMesa : public OpenGLPlatform {
protected:
// --------------------------------------------------------------------------------------------
// Platform Interface
Driver* createDriver(void* sharedGLContext, const DriverConfig& driverConfig) noexcept override;
int getOSVersion() const noexcept final override { return 0; }
// --------------------------------------------------------------------------------------------
// OpenGLPlatform Interface
void terminate() noexcept override;
SwapChain* createSwapChain(void* nativewindow, uint64_t flags) noexcept override;
SwapChain* createSwapChain(uint32_t width, uint32_t height, uint64_t flags) noexcept override;
void destroySwapChain(SwapChain* swapChain) noexcept override;
bool makeCurrent(ContextType type, SwapChain* drawSwapChain,
SwapChain* readSwapChain) noexcept override;
void commit(SwapChain* swapChain) noexcept override;
private:
OSMesaContext mContext;
void* mOsMesaApi = nullptr;
};
} // namespace filament::backend
#endif // TNT_FILAMENT_BACKEND_OPENGL_OPENGL_PLATFORM_OSMESA_H

View File

@@ -88,7 +88,6 @@ public:
VkFormat colorFormat = VK_FORMAT_UNDEFINED;
VkFormat depthFormat = VK_FORMAT_UNDEFINED;
VkExtent2D extent = {0, 0};
bool isProtected = false;
};
struct ImageSyncData {
@@ -200,13 +199,6 @@ public:
*/
virtual bool hasResized(SwapChainPtr handle);
/**
* Check if the surface is protected.
* @param handle The handle returned by createSwapChain()
* @return Whether the swapchain is protected
*/
virtual bool isProtected(SwapChainPtr handle);
/**
* Carry out a recreation of the swapchain.
* @param handle The handle returned by createSwapChain()
@@ -275,23 +267,6 @@ public:
*/
VkQueue getGraphicsQueue() const noexcept;
/**
* @return The family index of the protected graphics queue selected for the
* Vulkan backend.
*/
uint32_t getProtectedGraphicsQueueFamilyIndex() const noexcept;
/**
* @return The index of the protected graphics queue (if there are multiple
* graphics queues) selected for the Vulkan backend.
*/
uint32_t getProtectedGraphicsQueueIndex() const noexcept;
/**
* @return The protected queue that was selected for the Vulkan backend.
*/
VkQueue getProtectedGraphicsQueue() const noexcept;
private:
static ExtensionSet getSwapchainInstanceExtensions();

View File

@@ -18,7 +18,6 @@
#define TNT_FILAMENT_BACKEND_PRIVATE_DRIVER_H
#include <backend/CallbackHandler.h>
#include <backend/DescriptorSetOffsetArray.h>
#include <backend/DriverApiForward.h>
#include <backend/DriverEnums.h>
#include <backend/Handle.h>

View File

@@ -139,8 +139,7 @@ DECL_DRIVER_API_N(beginFrame,
DECL_DRIVER_API_N(setFrameScheduledCallback,
backend::SwapChainHandle, sch,
backend::CallbackHandler*, handler,
backend::FrameScheduledCallback&&, callback,
uint64_t, flags)
backend::FrameScheduledCallback&&, callback)
DECL_DRIVER_API_N(setFrameCompletedCallback,
backend::SwapChainHandle, sch,
@@ -163,10 +162,6 @@ DECL_DRIVER_API_0(finish)
// reset state tracking, if the driver does any state tracking (e.g. GL)
DECL_DRIVER_API_0(resetState)
DECL_DRIVER_API_N(setDebugTag,
backend::HandleBase::HandleId, handleId,
utils::CString, tag)
/*
* Creating driver objects
* -----------------------
@@ -201,33 +196,20 @@ DECL_DRIVER_API_R_N(backend::TextureHandle, createTexture,
uint32_t, depth,
backend::TextureUsage, usage)
DECL_DRIVER_API_R_N(backend::TextureHandle, createTextureView,
backend::TextureHandle, texture,
uint8_t, baseLevel,
uint8_t, levelCount)
DECL_DRIVER_API_R_N(backend::TextureHandle, createTextureViewSwizzle,
backend::TextureHandle, texture,
DECL_DRIVER_API_R_N(backend::TextureHandle, createTextureSwizzled,
backend::SamplerType, target,
uint8_t, levels,
backend::TextureFormat, format,
uint8_t, samples,
uint32_t, width,
uint32_t, height,
uint32_t, depth,
backend::TextureUsage, usage,
backend::TextureSwizzle, r,
backend::TextureSwizzle, g,
backend::TextureSwizzle, b,
backend::TextureSwizzle, a)
DECL_DRIVER_API_R_N(backend::TextureHandle, createTextureExternalImage,
backend::TextureFormat, format,
uint32_t, width,
uint32_t, height,
backend::TextureUsage, usage,
void*, image)
DECL_DRIVER_API_R_N(backend::TextureHandle, createTextureExternalImagePlane,
backend::TextureFormat, format,
uint32_t, width,
uint32_t, height,
backend::TextureUsage, usage,
void*, image,
uint32_t, plane)
DECL_DRIVER_API_R_N(backend::TextureHandle, importTexture,
intptr_t, id,
backend::SamplerType, target,
@@ -239,6 +221,9 @@ DECL_DRIVER_API_R_N(backend::TextureHandle, importTexture,
uint32_t, depth,
backend::TextureUsage, usage)
DECL_DRIVER_API_R_N(backend::SamplerGroupHandle, createSamplerGroup,
uint32_t, size, utils::FixedSizeString<32>, debugName)
DECL_DRIVER_API_R_N(backend::RenderPrimitiveHandle, createRenderPrimitive,
backend::VertexBufferHandle, vbh,
backend::IndexBufferHandle, ibh,
@@ -272,53 +257,25 @@ DECL_DRIVER_API_R_N(backend::SwapChainHandle, createSwapChainHeadless,
DECL_DRIVER_API_R_0(backend::TimerQueryHandle, createTimerQuery)
DECL_DRIVER_API_R_N(backend::DescriptorSetLayoutHandle, createDescriptorSetLayout,
backend::DescriptorSetLayout&&, info)
DECL_DRIVER_API_R_N(backend::DescriptorSetHandle, createDescriptorSet,
backend::DescriptorSetLayoutHandle, dslh)
DECL_DRIVER_API_N(updateDescriptorSetBuffer,
backend::DescriptorSetHandle, dsh,
backend::descriptor_binding_t, binding,
backend::BufferObjectHandle, boh,
uint32_t, offset,
uint32_t, size
)
DECL_DRIVER_API_N(updateDescriptorSetTexture,
backend::DescriptorSetHandle, dsh,
backend::descriptor_binding_t, binding,
backend::TextureHandle, th,
SamplerParams, params
)
DECL_DRIVER_API_N(bindDescriptorSet,
backend::DescriptorSetHandle, dsh,
backend::descriptor_set_t, set,
backend::DescriptorSetOffsetArray&&, offsets
)
/*
* Destroying driver objects
* -------------------------
*/
DECL_DRIVER_API_N(destroyVertexBuffer, backend::VertexBufferHandle, vbh)
DECL_DRIVER_API_N(destroyVertexBufferInfo, backend::VertexBufferInfoHandle, vbih)
DECL_DRIVER_API_N(destroyIndexBuffer, backend::IndexBufferHandle, ibh)
DECL_DRIVER_API_N(destroyBufferObject, backend::BufferObjectHandle, ibh)
DECL_DRIVER_API_N(destroyRenderPrimitive, backend::RenderPrimitiveHandle, rph)
DECL_DRIVER_API_N(destroyProgram, backend::ProgramHandle, ph)
DECL_DRIVER_API_N(destroyTexture, backend::TextureHandle, th)
DECL_DRIVER_API_N(destroyRenderTarget, backend::RenderTargetHandle, rth)
DECL_DRIVER_API_N(destroySwapChain, backend::SwapChainHandle, sch)
DECL_DRIVER_API_N(destroyStream, backend::StreamHandle, sh)
DECL_DRIVER_API_N(destroyTimerQuery, backend::TimerQueryHandle, sh)
DECL_DRIVER_API_N(destroyFence, backend::FenceHandle, fh)
DECL_DRIVER_API_N(destroyDescriptorSetLayout, backend::DescriptorSetLayoutHandle, dslh)
DECL_DRIVER_API_N(destroyDescriptorSet, backend::DescriptorSetHandle, dsh)
DECL_DRIVER_API_N(destroyVertexBuffer, backend::VertexBufferHandle, vbh)
DECL_DRIVER_API_N(destroyVertexBufferInfo,backend::VertexBufferInfoHandle, vbih)
DECL_DRIVER_API_N(destroyIndexBuffer, backend::IndexBufferHandle, ibh)
DECL_DRIVER_API_N(destroyBufferObject, backend::BufferObjectHandle, ibh)
DECL_DRIVER_API_N(destroyRenderPrimitive, backend::RenderPrimitiveHandle, rph)
DECL_DRIVER_API_N(destroyProgram, backend::ProgramHandle, ph)
DECL_DRIVER_API_N(destroySamplerGroup, backend::SamplerGroupHandle, sbh)
DECL_DRIVER_API_N(destroyTexture, backend::TextureHandle, th)
DECL_DRIVER_API_N(destroyRenderTarget, backend::RenderTargetHandle, rth)
DECL_DRIVER_API_N(destroySwapChain, backend::SwapChainHandle, sch)
DECL_DRIVER_API_N(destroyStream, backend::StreamHandle, sh)
DECL_DRIVER_API_N(destroyTimerQuery, backend::TimerQueryHandle, sh)
DECL_DRIVER_API_N(destroyFence, backend::FenceHandle, fh)
/*
* Synchronous APIs
@@ -385,6 +342,15 @@ DECL_DRIVER_API_N(updateBufferObjectUnsynchronized,
DECL_DRIVER_API_N(resetBufferObject,
backend::BufferObjectHandle, ibh)
DECL_DRIVER_API_N(updateSamplerGroup,
backend::SamplerGroupHandle, ubh,
backend::BufferDescriptor&&, data)
DECL_DRIVER_API_N(setMinMaxLevels,
backend::TextureHandle, th,
uint32_t, minLevel,
uint32_t, maxLevel)
DECL_DRIVER_API_N(update3DImage,
backend::TextureHandle, th,
uint32_t, level,
@@ -399,12 +365,10 @@ DECL_DRIVER_API_N(update3DImage,
DECL_DRIVER_API_N(generateMipmaps,
backend::TextureHandle, th)
// Deprecated
DECL_DRIVER_API_N(setExternalImage,
backend::TextureHandle, th,
void*, image)
// Deprecated
DECL_DRIVER_API_N(setExternalImagePlane,
backend::TextureHandle, th,
void*, image,
@@ -451,16 +415,37 @@ DECL_DRIVER_API_N(commit,
* -----------------------
*/
DECL_DRIVER_API_N(bindUniformBuffer,
uint32_t, index,
backend::BufferObjectHandle, ubh)
DECL_DRIVER_API_N(bindBufferRange,
BufferObjectBinding, bindingType,
uint32_t, index,
backend::BufferObjectHandle, ubh,
uint32_t, offset,
uint32_t, size)
DECL_DRIVER_API_N(unbindBuffer,
BufferObjectBinding, bindingType,
uint32_t, index)
DECL_DRIVER_API_N(bindSamplers,
uint32_t, index,
backend::SamplerGroupHandle, sbh)
DECL_DRIVER_API_N(setPushConstant,
backend::ShaderStage, stage,
uint8_t, index,
backend::PushConstantVariant, value)
DECL_DRIVER_API_N(insertEventMarker,
const char*, string)
const char*, string,
uint32_t, len = 0)
DECL_DRIVER_API_N(pushGroupMarker,
const char*, string)
const char*, string,
uint32_t, len = 0)
DECL_DRIVER_API_0(popGroupMarker)

View File

@@ -18,17 +18,6 @@
#define TNT_FILAMENT_BACKEND_PRIVATE_DRIVERAPI_H
#include "backend/DriverApiForward.h"
#include "private/backend/CommandStream.h"
#include <stddef.h>
namespace filament::backend {
inline void* allocateFromCommandStream(DriverApi& driver, size_t size, size_t alignment) noexcept {
return driver.allocate(size, alignment);
}
} // namespace filament::backend
#endif // TNT_FILAMENT_BACKEND_PRIVATE_DRIVERAPI_H

View File

@@ -20,12 +20,11 @@
#include <backend/Handle.h>
#include <utils/Allocator.h>
#include <utils/CString.h>
#include <utils/Log.h>
#include <utils/Panic.h>
#include <utils/compiler.h>
#include <utils/debug.h>
#include <utils/ostream.h>
#include <utils/Panic.h>
#include <tsl/robin_map.h>
@@ -38,7 +37,7 @@
#include <stddef.h>
#include <stdint.h>
#define HandleAllocatorGL HandleAllocator<32, 96, 136> // ~4520 / pool / MiB
#define HandleAllocatorGL HandleAllocator<32, 64, 136> // ~4520 / pool / MiB
#define HandleAllocatorVK HandleAllocator<64, 160, 312> // ~1820 / pool / MiB
#define HandleAllocatorMTL HandleAllocator<32, 64, 552> // ~1660 / pool / MiB
@@ -169,31 +168,13 @@ public:
auto [p, tag] = handleToPointer(handle.getId());
if (isPoolHandle(handle.getId())) {
// check for pool handle use-after-free
// check for use after free
if (UTILS_UNLIKELY(!mUseAfterFreeCheckDisabled)) {
uint8_t const age = (tag & HANDLE_AGE_MASK) >> HANDLE_AGE_SHIFT;
auto const pNode = static_cast<typename Allocator::Node*>(p);
uint8_t const expectedAge = pNode[-1].age;
// getHandleTag() is only called if the check fails.
FILAMENT_CHECK_POSTCONDITION(expectedAge == age)
<< "use-after-free of Handle with id=" << handle.getId()
<< ", tag=" << getHandleTag(handle.getId()).c_str_safe();
}
} else {
// check for heap handle use-after-free
if (UTILS_UNLIKELY(!mUseAfterFreeCheckDisabled)) {
uint8_t const index = (handle.getId() & HANDLE_INDEX_MASK);
// if we've already handed out this handle index before, it's definitely a
// use-after-free, otherwise it's probably just a corrupted handle
if (index < mId) {
FILAMENT_CHECK_POSTCONDITION(p != nullptr)
<< "use-after-free of heap Handle with id=" << handle.getId()
<< ", tag=" << getHandleTag(handle.getId()).c_str_safe();
} else {
FILAMENT_CHECK_POSTCONDITION(p != nullptr)
<< "corrupted heap Handle with id=" << handle.getId()
<< ", tag=" << getHandleTag(handle.getId()).c_str_safe();
}
FILAMENT_CHECK_POSTCONDITION(expectedAge == age) <<
"use-after-free of Handle with id=" << handle.getId();
}
}
@@ -202,18 +183,14 @@ public:
template<typename B>
bool is_valid(Handle<B>& handle) {
if (!handle) {
// null handles are invalid
return false;
}
auto [p, tag] = handleToPointer(handle.getId());
if (isPoolHandle(handle.getId())) {
if (handle && isPoolHandle(handle.getId())) {
auto [p, tag] = handleToPointer(handle.getId());
uint8_t const age = (tag & HANDLE_AGE_MASK) >> HANDLE_AGE_SHIFT;
auto const pNode = static_cast<typename Allocator::Node*>(p);
uint8_t const expectedAge = pNode[-1].age;
return expectedAge == age;
}
return p != nullptr;
return true;
}
template<typename Dp, typename B>
@@ -224,29 +201,6 @@ public:
return handle_cast<Dp>(const_cast<Handle<B>&>(handle));
}
void associateTagToHandle(HandleBase::HandleId id, utils::CString&& tag) noexcept {
// TODO: for now, only pool handles check for use-after-free, so we only keep tags for
// those
if (isPoolHandle(id)) {
// Truncate the age to get the debug tag
uint32_t const key = id & ~(HANDLE_DEBUG_TAG_MASK ^ HANDLE_AGE_MASK);
// This line is the costly part. In the future, we could potentially use a custom
// allocator.
mDebugTags[key] = std::move(tag);
}
}
utils::CString getHandleTag(HandleBase::HandleId id) const noexcept {
if (!isPoolHandle(id)) {
return "(no tag)";
}
uint32_t const key = id & ~(HANDLE_DEBUG_TAG_MASK ^ HANDLE_AGE_MASK);
if (auto pos = mDebugTags.find(key); pos != mDebugTags.end()) {
return pos->second;
}
return "(no tag)";
}
private:
template<typename D>
@@ -364,24 +318,12 @@ private:
}
}
// number if bits allotted to the handle's age (currently 4 max)
static constexpr uint32_t HANDLE_AGE_BIT_COUNT = 4;
// number if bits allotted to the handle's debug tag (HANDLE_AGE_BIT_COUNT max)
static constexpr uint32_t HANDLE_DEBUG_TAG_BIT_COUNT = 2;
// bit shift for both the age and debug tag
static constexpr uint32_t HANDLE_AGE_SHIFT = 27;
// mask for the heap (vs pool) flag
static constexpr uint32_t HANDLE_HEAP_FLAG = 0x80000000u;
// mask for the age
static constexpr uint32_t HANDLE_AGE_MASK =
((1 << HANDLE_AGE_BIT_COUNT) - 1) << HANDLE_AGE_SHIFT;
// mask for the debug tag
static constexpr uint32_t HANDLE_DEBUG_TAG_MASK =
((1 << HANDLE_DEBUG_TAG_BIT_COUNT) - 1) << HANDLE_AGE_SHIFT;
// mask for the index
static constexpr uint32_t HANDLE_INDEX_MASK = 0x07FFFFFFu;
static_assert(HANDLE_DEBUG_TAG_BIT_COUNT <= HANDLE_AGE_BIT_COUNT);
// we handle a 4 bits age per address
static constexpr uint32_t HANDLE_HEAP_FLAG = 0x80000000u; // pool vs heap handle
static constexpr uint32_t HANDLE_AGE_MASK = 0x78000000u; // handle's age
static constexpr uint32_t HANDLE_INDEX_MASK = 0x07FFFFFFu; // handle index
static constexpr uint32_t HANDLE_TAG_MASK = HANDLE_AGE_MASK;
static constexpr uint32_t HANDLE_AGE_SHIFT = 27;
static bool isPoolHandle(HandleBase::HandleId id) noexcept {
return (id & HANDLE_HEAP_FLAG) == 0u;
@@ -396,7 +338,7 @@ private:
// a non-pool handle.
if (UTILS_LIKELY(isPoolHandle(id))) {
char* const base = (char*)mHandleArena.getArea().begin();
uint32_t const tag = id & HANDLE_AGE_MASK;
uint32_t const tag = id & HANDLE_TAG_MASK;
size_t const offset = (id & HANDLE_INDEX_MASK) * Allocator::getAlignment();
return { static_cast<void*>(base + offset), tag };
}
@@ -411,7 +353,7 @@ private:
size_t const offset = (char*)p - base;
assert_invariant((offset % Allocator::getAlignment()) == 0);
auto id = HandleBase::HandleId(offset / Allocator::getAlignment());
id |= tag & HANDLE_AGE_MASK;
id |= tag & HANDLE_TAG_MASK;
assert_invariant((id & HANDLE_HEAP_FLAG) == 0);
return id;
}
@@ -421,7 +363,6 @@ private:
// Below is only used when running out of space in the HandleArena
mutable utils::Mutex mLock;
tsl::robin_map<HandleBase::HandleId, void*> mOverflowMap;
tsl::robin_map<HandleBase::HandleId, utils::CString> mDebugTags;
HandleBase::HandleId mId = 0;
bool mUseAfterFreeCheckDisabled = false;
};

View File

@@ -24,7 +24,7 @@
#include <utils/debug.h>
#include <utils/ostream.h>
#if !defined(WIN32) && !defined(__EMSCRIPTEN__)
#if !defined(WIN32) && !defined(__EMSCRIPTEN__) && !defined(IOS)
# include <sys/mman.h>
# include <unistd.h>
# define HAS_MMAP 1

View File

@@ -20,16 +20,11 @@
#include <utils/CallStack.h>
#endif
#include <utils/compiler.h>
#include <utils/Log.h>
#include <utils/ostream.h>
#include <utils/Profiler.h>
#include <utils/Systrace.h>
#include <cstddef>
#include <functional>
#include <string>
#include <utility>
#ifdef __ANDROID__
#include <sys/system_properties.h>
@@ -79,8 +74,8 @@ CommandStream::CommandStream(Driver& driver, CircularBuffer& buffer) noexcept
}
void CommandStream::execute(void* buffer) {
// NOTE: we can't use SYSTRACE_CALL() or similar here because, execute() below, also
// uses systrace BEGIN/END and the END is not guaranteed to be happening in this scope.
SYSTRACE_CALL();
SYSTRACE_CONTEXT();
Profiler profiler;
@@ -105,7 +100,6 @@ void CommandStream::execute(void* buffer) {
// we want to remove all this when tracing is completely disabled
profiler.stop();
UTILS_UNUSED Profiler::Counters const counters = profiler.readCounters();
SYSTRACE_CONTEXT();
SYSTRACE_VALUE32("GLThread (I)", counters.getInstructions());
SYSTRACE_VALUE32("GLThread (C)", counters.getCpuCycles());
SYSTRACE_VALUE32("GLThread (CPI x10)", counters.getCPI() * 10);

View File

@@ -101,14 +101,6 @@ struct HwProgram : public HwBase {
HwProgram() noexcept = default;
};
struct HwDescriptorSetLayout : public HwBase {
HwDescriptorSetLayout() noexcept = default;
};
struct HwDescriptorSet : public HwBase {
HwDescriptorSet() noexcept = default;
};
struct HwSamplerGroup : public HwBase {
HwSamplerGroup() noexcept = default;
};

View File

@@ -80,9 +80,6 @@ HandleAllocator<P0, P1, P2>::HandleAllocator(const char* name, size_t size,
bool disableUseAfterFreeCheck) noexcept
: mHandleArena(name, size, disableUseAfterFreeCheck),
mUseAfterFreeCheckDisabled(disableUseAfterFreeCheck) {
// Reserve initial space for debug tags. This prevents excessive calls to malloc when the first
// few tags are set.
mDebugTags.reserve(512);
}
template <size_t P0, size_t P1, size_t P2>

View File

@@ -41,10 +41,6 @@
#if defined(FILAMENT_SUPPORTS_OPENGL) && !defined(FILAMENT_USE_EXTERNAL_GLES3)
#include "backend/platforms/PlatformEGLHeadless.h"
#endif
#elif defined(FILAMENT_SUPPORTS_OSMESA)
#if defined(FILAMENT_SUPPORTS_OPENGL) && !defined(FILAMENT_USE_EXTERNAL_GLES3)
#include "backend/platforms/PlatformOSMesa.h"
#endif
#endif
#elif defined(WIN32)
#if defined(FILAMENT_SUPPORTS_OPENGL) && !defined(FILAMENT_USE_EXTERNAL_GLES3)
@@ -128,8 +124,6 @@ Platform* PlatformFactory::create(Backend* backend) noexcept {
return new PlatformGLX();
#elif defined(FILAMENT_SUPPORTS_EGL_ON_LINUX)
return new PlatformEGLHeadless();
#elif defined(FILAMENT_SUPPORTS_OSMESA)
return new PlatformOSMesa();
#else
return nullptr;
#endif

View File

@@ -14,18 +14,7 @@
* limitations under the License.
*/
#include <backend/Program.h>
#include <backend/DriverEnums.h>
#include <utils/debug.h>
#include <utils/CString.h>
#include <utils/ostream.h>
#include <utils/Invocable.h>
#include <utility>
#include <stddef.h>
#include <stdint.h>
#include "backend/Program.h"
namespace filament::backend {
@@ -63,24 +52,41 @@ Program& Program::shaderLanguage(ShaderLanguage shaderLanguage) {
return *this;
}
Program& Program::descriptorBindings(backend::descriptor_set_t set,
DescriptorBindingsInfo descriptorBindings) noexcept {
mDescriptorBindings[set] = std::move(descriptorBindings);
Program& Program::uniformBlockBindings(
FixedCapacityVector<std::pair<utils::CString, uint8_t>> const& uniformBlockBindings) noexcept {
for (auto const& item : uniformBlockBindings) {
assert_invariant(item.second < UNIFORM_BINDING_COUNT);
mUniformBlocks[item.second] = item.first;
}
return *this;
}
Program& Program::uniforms(uint32_t index, utils::CString name, UniformInfo uniforms) noexcept {
mBindingUniformsInfo.reserve(mBindingUniformsInfo.capacity() + 1);
mBindingUniformsInfo.emplace_back(index, std::move(name), std::move(uniforms));
Program& Program::uniforms(uint32_t index, UniformInfo const& uniforms) noexcept {
assert_invariant(index < UNIFORM_BINDING_COUNT);
mBindingUniformInfo[index] = uniforms;
return *this;
}
Program& Program::attributes(AttributesInfo attributes) noexcept {
Program& Program::attributes(
utils::FixedCapacityVector<std::pair<utils::CString, uint8_t>> attributes) noexcept {
mAttributes = std::move(attributes);
return *this;
}
Program& Program::specializationConstants(SpecializationConstantsInfo specConstants) noexcept {
Program& Program::setSamplerGroup(size_t bindingPoint, ShaderStageFlags stageFlags,
const Program::Sampler* samplers, size_t count) noexcept {
auto& groupData = mSamplerGroups[bindingPoint];
groupData.stageFlags = stageFlags;
auto& samplerList = groupData.samplers;
samplerList.reserve(count);
samplerList.resize(count);
std::copy_n(samplers, count, samplerList.data());
return *this;
}
Program& Program::specializationConstants(
FixedCapacityVector<SpecializationConstant> specConstants) noexcept {
mSpecializationConstants = std::move(specConstants);
return *this;
}

View File

@@ -1,28 +0,0 @@
/*
* Copyright (C) 2024 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TNT_FILAMENT_BACKEND_SYSTRACEPROFILE_H
#define TNT_FILAMENT_BACKEND_SYSTRACEPROFILE_H
#include <utils/Systrace.h>
#define PROFILE_SCOPE(marker) SYSTRACE_NAME(marker)
#define PROFILE_NAME_BEGINFRAME "backend::beginFrame"
#define PROFILE_NAME_ENDFRAME "backend::endFrame"
#endif // TNT_FILAMENT_BACKEND_SYSTRACEPROFILE_H

View File

@@ -73,11 +73,10 @@ public:
enum class Type {
NONE = 0,
GENERIC = 1,
RING = 2, // deprecated
RING = 2,
STAGING = 3,
DESCRIPTOR_SET = 4,
};
static constexpr size_t TypeCount = 4;
static constexpr size_t TypeCount = 3;
static constexpr auto toIndex(Type t) {
assert_invariant(t != Type::NONE);
@@ -89,8 +88,6 @@ public:
return 1;
case Type::STAGING:
return 2;
case Type::DESCRIPTOR_SET:
return 3;
}
}
@@ -163,8 +160,6 @@ public:
size_t size, bool forceGpuBuffer = false);
~MetalBuffer();
[[nodiscard]] bool wasAllocationSuccessful() const noexcept { return mBuffer || mCpuBuffer; }
MetalBuffer(const MetalBuffer& rhs) = delete;
MetalBuffer& operator=(const MetalBuffer& rhs) = delete;
@@ -174,10 +169,8 @@ public:
* Update the buffer with data inside src. Potentially allocates a new buffer allocation to hold
* the bytes which will be released when the current frame is finished.
*/
using TagResolver = utils::Invocable<const char*(void)>;
void copyIntoBuffer(void* src, size_t size, size_t byteOffset, TagResolver&& getHandleTag);
void copyIntoBufferUnsynchronized(
void* src, size_t size, size_t byteOffset, TagResolver&& getHandleTag);
void copyIntoBuffer(void* src, size_t size, size_t byteOffset);
void copyIntoBufferUnsynchronized(void* src, size_t size, size_t byteOffset);
/**
* Denotes that this buffer is used for a draw call ensuring that its allocation remains valid
@@ -187,7 +180,7 @@ public:
* is no device allocation.
*
*/
id<MTLBuffer> getGpuBufferForDraw() noexcept;
id<MTLBuffer> getGpuBufferForDraw(id<MTLCommandBuffer> cmdBuffer) noexcept;
void* getCpuBuffer() const noexcept { return mCpuBuffer; }
@@ -216,10 +209,8 @@ private:
BUMP_ALLOCATOR,
};
void uploadWithPoolBuffer(
void* src, size_t size, size_t byteOffset, TagResolver&& getHandleTag) const;
void uploadWithBumpAllocator(
void* src, size_t size, size_t byteOffset, TagResolver&& getHandleTag) const;
void uploadWithPoolBuffer(void* src, size_t size, size_t byteOffset) const;
void uploadWithBumpAllocator(void* src, size_t size, size_t byteOffset) const;
UploadStrategy mUploadStrategy;
TrackedMetalBuffer mBuffer;

View File

@@ -40,15 +40,12 @@ MetalBuffer::MetalBuffer(MetalContext& context, BufferObjectBinding bindingType,
// If the buffer is less than 4K in size and is updated frequently, we don't use an explicit
// buffer. Instead, we use immediate command encoder methods like setVertexBytes:length:atIndex:.
// This won't work for SSBOs, since they are read/write.
/*
if (size <= 4 * 1024 && bindingType != BufferObjectBinding::SHADER_STORAGE &&
usage == BufferUsage::DYNAMIC && !forceGpuBuffer) {
mBuffer = nil;
mCpuBuffer = malloc(size);
return;
}
*/
// Otherwise, we allocate a private GPU buffer.
{
@@ -56,8 +53,8 @@ MetalBuffer::MetalBuffer(MetalContext& context, BufferObjectBinding bindingType,
mBuffer = { [context.device newBufferWithLength:size options:MTLResourceStorageModePrivate],
TrackedMetalBuffer::Type::GENERIC };
}
// mBuffer might fail to be allocated. Clients can check for this by calling
// wasAllocationSuccessful().
FILAMENT_CHECK_POSTCONDITION(mBuffer)
<< "Could not allocate Metal buffer of size " << size << ".";
}
MetalBuffer::~MetalBuffer() {
@@ -66,20 +63,15 @@ MetalBuffer::~MetalBuffer() {
}
}
void MetalBuffer::copyIntoBuffer(
void* src, size_t size, size_t byteOffset, TagResolver&& getHandleTag) {
void MetalBuffer::copyIntoBuffer(void* src, size_t size, size_t byteOffset) {
if (size <= 0) {
return;
}
FILAMENT_CHECK_PRECONDITION(src)
<< "copyIntoBuffer called with a null src, tag=" << getHandleTag();
FILAMENT_CHECK_PRECONDITION(size + byteOffset <= mBufferSize)
<< "Attempting to copy " << size << " bytes into a buffer of size " << mBufferSize
<< " at offset " << byteOffset << ", tag=" << getHandleTag();
<< " at offset " << byteOffset;
// The copy blit requires that byteOffset be a multiple of 4.
FILAMENT_CHECK_PRECONDITION(!(byteOffset & 0x3))
<< "byteOffset must be a multiple of 4, tag=" << getHandleTag();
FILAMENT_CHECK_PRECONDITION(!(byteOffset & 0x3)) << "byteOffset must be a multiple of 4";
// If we have a cpu buffer, we can directly copy into it.
if (mCpuBuffer) {
@@ -89,21 +81,20 @@ void MetalBuffer::copyIntoBuffer(
switch (mUploadStrategy) {
case UploadStrategy::BUMP_ALLOCATOR:
uploadWithBumpAllocator(src, size, byteOffset, std::move(getHandleTag));
uploadWithBumpAllocator(src, size, byteOffset);
break;
case UploadStrategy::POOL:
uploadWithPoolBuffer(src, size, byteOffset, std::move(getHandleTag));
uploadWithPoolBuffer(src, size, byteOffset);
break;
}
}
void MetalBuffer::copyIntoBufferUnsynchronized(
void* src, size_t size, size_t byteOffset, TagResolver&& getHandleTag) {
void MetalBuffer::copyIntoBufferUnsynchronized(void* src, size_t size, size_t byteOffset) {
// TODO: implement the unsynchronized version
copyIntoBuffer(src, size, byteOffset, std::move(getHandleTag));
copyIntoBuffer(src, size, byteOffset);
}
id<MTLBuffer> MetalBuffer::getGpuBufferForDraw() noexcept {
id<MTLBuffer> MetalBuffer::getGpuBufferForDraw(id<MTLCommandBuffer> cmdBuffer) noexcept {
// If there's a CPU buffer, then we return nil here, as the CPU-side buffer will be bound
// separately.
if (mCpuBuffer) {
@@ -146,7 +137,7 @@ void MetalBuffer::bindBuffers(id<MTLCommandBuffer> cmdBuffer, id<MTLCommandEncod
}
// getGpuBufferForDraw() might return nil, which means there isn't a device allocation for
// this buffer. In this case, we'll bind the buffer below with the CPU-side memory.
id<MTLBuffer> gpuBuffer = buffer->getGpuBufferForDraw();
id<MTLBuffer> gpuBuffer = buffer->getGpuBufferForDraw(cmdBuffer);
if (!gpuBuffer) {
continue;
}
@@ -206,13 +197,9 @@ void MetalBuffer::bindBuffers(id<MTLCommandBuffer> cmdBuffer, id<MTLCommandEncod
}
}
void MetalBuffer::uploadWithPoolBuffer(
void* src, size_t size, size_t byteOffset, TagResolver&& getHandleTag) const {
void MetalBuffer::uploadWithPoolBuffer(void* src, size_t size, size_t byteOffset) const {
MetalBufferPool* bufferPool = mContext.bufferPool;
const MetalBufferPoolEntry* const staging = bufferPool->acquireBuffer(size);
FILAMENT_CHECK_POSTCONDITION(staging)
<< "uploadWithPoolbuffer unable to acquire staging buffer of size " << size
<< ", tag=" << getHandleTag();
memcpy(staging->buffer.get().contents, src, size);
// Encode a blit from the staging buffer into the private GPU buffer.
@@ -230,18 +217,10 @@ void MetalBuffer::uploadWithPoolBuffer(
}];
}
void MetalBuffer::uploadWithBumpAllocator(
void* src, size_t size, size_t byteOffset, TagResolver&& getHandleTag) const {
void MetalBuffer::uploadWithBumpAllocator(void* src, size_t size, size_t byteOffset) const {
MetalBumpAllocator& allocator = *mContext.bumpAllocator;
auto [buffer, offset] = allocator.allocateStagingArea(size);
FILAMENT_CHECK_POSTCONDITION(buffer)
<< "uploadWithBumpAllocator unable to acquire staging area of size " << size
<< ", tag=" << getHandleTag();
void* const contents = buffer.contents;
FILAMENT_CHECK_POSTCONDITION(contents)
<< "uploadWithBumpAllocator unable to acquire pointer to staging area, size " << size
<< ", tag=" << getHandleTag();
memcpy(static_cast<char*>(contents) + offset, src, size);
memcpy(static_cast<char*>(buffer.contents) + offset, src, size);
// Encode a blit from the staging buffer into the private GPU buffer.
id<MTLCommandBuffer> cmdBuffer = getPendingCommandBuffer(&mContext);

View File

@@ -21,8 +21,6 @@
#include "MetalShaderCompiler.h"
#include "MetalState.h"
#include <backend/DriverEnums.h>
#include <CoreVideo/CVMetalTextureCache.h>
#include <Metal/Metal.h>
#include <QuartzCore/QuartzCore.h>
@@ -48,13 +46,13 @@ class MetalBlitter;
class MetalBufferPool;
class MetalBumpAllocator;
class MetalRenderTarget;
class MetalSamplerGroup;
class MetalSwapChain;
class MetalTexture;
class MetalTimerQueryInterface;
struct MetalUniformBuffer;
struct MetalIndexBuffer;
struct MetalVertexBuffer;
struct MetalDescriptorSet;
constexpr static uint8_t MAX_SAMPLE_COUNT = 8; // Metal devices support at most 8 MSAA samples
@@ -70,64 +68,16 @@ private:
bool mDirty = false;
};
class MetalDynamicOffsets {
public:
void setOffsets(uint32_t set, const uint32_t* offsets, uint32_t count) {
assert(set < MAX_DESCRIPTOR_SET_COUNT);
auto getStartIndexForSet = [&](uint32_t s) {
uint32_t startIndex = 0;
for (uint32_t i = 0; i < s; i++) {
startIndex += mCounts[i];
}
return startIndex;
};
const bool resizeNecessary = mCounts[set] != count;
if (UTILS_UNLIKELY(resizeNecessary)) {
int delta = count - mCounts[set];
auto thisSetStart = mOffsets.begin() + getStartIndexForSet(set);
if (delta > 0) {
mOffsets.insert(thisSetStart, delta, 0);
} else {
mOffsets.erase(thisSetStart, thisSetStart - delta);
}
mCounts[set] = count;
}
if (resizeNecessary ||
!std::equal(
offsets, offsets + count, mOffsets.begin() + getStartIndexForSet(set))) {
std::copy(offsets, offsets + count, mOffsets.begin() + getStartIndexForSet(set));
mDirty = true;
}
}
bool isDirty() const { return mDirty; }
void setDirty(bool dirty) { mDirty = dirty; }
std::pair<uint32_t, const uint32_t*> getOffsets() const {
return { mOffsets.size(), mOffsets.data() };
}
private:
std::array<uint32_t, MAX_DESCRIPTOR_SET_COUNT> mCounts = { 0 };
std::vector<uint32_t> mOffsets;
bool mDirty = false;
};
struct MetalContext {
explicit MetalContext(size_t metalFreedTextureListSize)
: texturesToDestroy(metalFreedTextureListSize) {}
MetalDriver* driver;
id<MTLDevice> device = nullptr;
id<MTLCommandQueue> commandQueue = nullptr;
// The ID of pendingCommandBuffer (or the next command buffer, if pendingCommandBuffer is nil).
uint64_t pendingCommandBufferId = 1;
// read from driver thread, set from completion handlers
std::atomic<uint64_t> latestCompletedCommandBufferId = 0;
id<MTLCommandBuffer> pendingCommandBuffer = nil;
id<MTLRenderCommandEncoder> currentRenderPassEncoder = nil;
id<MTLCommandBuffer> pendingCommandBuffer = nullptr;
id<MTLRenderCommandEncoder> currentRenderPassEncoder = nullptr;
std::atomic<bool> memorylessLimitsReached = false;
@@ -158,10 +108,11 @@ struct MetalContext {
// State trackers.
PipelineStateTracker pipelineState;
DepthStencilStateTracker depthStencilState;
std::array<BufferState, Program::UNIFORM_BINDING_COUNT> uniformState;
std::array<BufferState, MAX_SSBO_COUNT> ssboState;
CullModeStateTracker cullModeState;
WindingStateTracker windingState;
DepthClampStateTracker depthClampState;
ScissorRectStateTracker scissorRectState;
Handle<HwRenderPrimitive> currentRenderPrimitive;
// State caches.
@@ -174,17 +125,23 @@ struct MetalContext {
std::array<MetalPushConstantBuffer, Program::SHADER_TYPE_COUNT> currentPushConstants;
// Keeps track of descriptor sets we've finalized for the current render pass.
tsl::robin_set<MetalDescriptorSet*> finalizedDescriptorSets;
std::array<MetalDescriptorSet*, MAX_DESCRIPTOR_SET_COUNT> currentDescriptorSets = {};
MetalBufferBindings<MAX_DESCRIPTOR_SET_COUNT, ShaderStage::VERTEX> vertexDescriptorBindings;
MetalBufferBindings<MAX_DESCRIPTOR_SET_COUNT, ShaderStage::FRAGMENT> fragmentDescriptorBindings;
MetalBufferBindings<MAX_DESCRIPTOR_SET_COUNT, ShaderStage::COMPUTE> computeDescriptorBindings;
MetalDynamicOffsets dynamicOffsets;
MetalSamplerGroup* samplerBindings[Program::SAMPLER_BINDING_COUNT] = {};
// Keeps track of all alive textures.
// Keeps track of sampler groups we've finalized for the current render pass.
tsl::robin_set<MetalSamplerGroup*> finalizedSamplerGroups;
// Keeps track of all alive sampler groups, textures.
tsl::robin_set<MetalSamplerGroup*> samplerGroups;
tsl::robin_set<MetalTexture*> textures;
// This circular buffer implements delayed destruction for Metal texture handles. It keeps a
// handle to a fixed number of the most recently destroyed texture handles. When we're asked to
// destroy a texture handle, we free its texture memory, but keep the MetalTexture object alive,
// marking it as "terminated". If we later are asked to use that texture, we can check its
// terminated status and throw an Objective-C error instead of crashing, which is helpful for
// debugging use-after-free issues in release builds.
utils::FixedCircularBuffer<Handle<HwTexture>> texturesToDestroy;
MetalBufferPool* bufferPool;
MetalBumpAllocator* bumpAllocator;
@@ -197,7 +154,6 @@ struct MetalContext {
// Empty texture used to prevent GPU errors when a sampler has been bound without a texture.
id<MTLTexture> emptyTexture = nil;
id<MTLBuffer> emptyBuffer = nil;
MetalBlitter* blitter = nullptr;

View File

@@ -101,14 +101,9 @@ id<MTLCommandBuffer> getPendingCommandBuffer(MetalContext* context) {
context->pendingCommandBuffer = [context->commandQueue commandBuffer];
// It's safe for this block to capture the context variable. MetalDriver::terminate will ensure
// all frames and their completion handlers finish before context is deallocated.
uint64_t thisCommandBufferId = context->pendingCommandBufferId;
[context->pendingCommandBuffer addCompletedHandler:^(id <MTLCommandBuffer> buffer) {
context->resourceTracker.clearResources((__bridge void*) buffer);
// Command buffers should complete in order, so latestCompletedCommandBufferId will only
// ever increase.
context->latestCompletedCommandBufferId = thisCommandBufferId;
auto errorCode = (MTLCommandBufferError)buffer.error.code;
if (@available(macOS 11.0, *)) {
if (errorCode == MTLCommandBufferErrorMemoryless) {
@@ -130,7 +125,6 @@ void submitPendingCommands(MetalContext* context) {
assert_invariant(context->pendingCommandBuffer.status != MTLCommandBufferStatusCommitted);
[context->pendingCommandBuffer commit];
context->pendingCommandBuffer = nil;
context->pendingCommandBufferId++;
}
id<MTLTexture> getOrCreateEmptyTexture(MetalContext* context) {
@@ -173,6 +167,7 @@ void MetalPushConstantBuffer::setPushConstant(PushConstantVariant value, uint8_t
void MetalPushConstantBuffer::setBytes(id<MTLCommandEncoder> encoder, ShaderStage stage) {
constexpr size_t PUSH_CONSTANT_SIZE_BYTES = 4;
constexpr size_t PUSH_CONSTANT_BUFFER_INDEX = 26;
static char buffer[MAX_PUSH_CONSTANT_COUNT * PUSH_CONSTANT_SIZE_BYTES];
assert_invariant(mPushConstants.size() <= MAX_PUSH_CONSTANT_COUNT);

View File

@@ -32,7 +32,6 @@
#include <functional>
#include <mutex>
#include <vector>
#include <deque>
namespace filament {
namespace backend {
@@ -58,11 +57,11 @@ class MetalDriver final : public DriverBase {
public:
static Driver* create(MetalPlatform* platform, const Platform::DriverConfig& driverConfig);
void runAtNextTick(const std::function<void()>& fn) noexcept;
private:
friend class MetalSwapChain;
friend struct MetalDescriptorSet;
MetalPlatform& mPlatform;
MetalContext* mContext;
@@ -74,23 +73,10 @@ private:
/*
* Tasks run regularly on the driver thread.
* Not thread-safe; tasks are run from the driver thead and must be enqueued from the driver
* thread.
*/
void runAtNextTick(const std::function<void()>& fn) noexcept;
void executeTickOps() noexcept;
std::vector<std::function<void()>> mTickOps;
// Tasks regularly executed on the driver thread after a command buffer has completed
struct DeferredTask {
DeferredTask(uint64_t commandBufferId, utils::Invocable<void()>&& fn) noexcept
: commandBufferId(commandBufferId), fn(std::move(fn)) {}
uint64_t commandBufferId; // after this command buffer completes
utils::Invocable<void()> fn; // execute this task
};
void executeAfterCurrentCommandBufferCompletes(utils::Invocable<void()>&& fn) noexcept;
void executeDeferredOps() noexcept;
std::deque<DeferredTask> mDeferredTasks;
std::mutex mTickOpsLock;
/*
* Driver interface
@@ -151,6 +137,7 @@ private:
inline void setRenderPrimitiveBuffer(Handle<HwRenderPrimitive> rph, PrimitiveType pt,
Handle<HwVertexBuffer> vbh, Handle<HwIndexBuffer> ibh);
void finalizeSamplerGroup(MetalSamplerGroup* sg);
void enumerateBoundBuffers(BufferObjectBinding bindingType,
const std::function<void(const BufferState&, MetalBuffer*, uint32_t)>& f);

File diff suppressed because it is too large Load Diff

View File

@@ -71,8 +71,7 @@ constexpr inline MTLIndexType getIndexType(size_t elementSize) noexcept {
} else if (elementSize == 4) {
return MTLIndexTypeUInt32;
}
assert_invariant(false);
return MTLIndexTypeUInt16;
FILAMENT_CHECK_POSTCONDITION(false) << "Index element size not supported.";
}
constexpr inline MTLVertexFormat getMetalFormat(ElementType type, bool normalized) noexcept {

View File

@@ -32,75 +32,100 @@ struct MetalContext;
* texture.
*/
class MetalExternalImage {
public:
MetalExternalImage() = default;
MetalExternalImage(MetalExternalImage&&);
MetalExternalImage& operator=(MetalExternalImage&&);
~MetalExternalImage() noexcept;
MetalExternalImage(const MetalExternalImage&) = delete;
MetalExternalImage& operator=(const MetalExternalImage&) = delete;
MetalExternalImage(MetalContext& context,
TextureSwizzle r = TextureSwizzle::CHANNEL_0,
TextureSwizzle g = TextureSwizzle::CHANNEL_1,
TextureSwizzle b = TextureSwizzle::CHANNEL_2,
TextureSwizzle a = TextureSwizzle::CHANNEL_3) noexcept;
/**
* While the texture is used for rendering, this MetalExternalImage must be kept alive.
* @return true, if this MetalExternalImage is holding a live external image. Returns false
* until set has been called with a valid CVPixelBuffer. The image can be cleared via
* set(nullptr), and isValid will return false again.
*/
id<MTLTexture> getMtlTexture() const noexcept;
bool isValid() const noexcept {
return mImage != nil || mRgbTexture != nullptr;
}
NSUInteger getWidth() const noexcept;
NSUInteger getHeight() const noexcept;
bool isValid() const noexcept;
/**
* Create an external image with the passed-in CVPixelBuffer.
* Set this external image to the passed-in CVPixelBuffer. Future calls to
* getMetalTextureForDraw will return a texture backed by this CVPixelBuffer. Previous
* CVPixelBuffers and related resources will be released when all GPU work using them has
* finished.
*
* Ownership is taken of the CVPixelBuffer, which will be released when the returned
* MetalExternalImage is destroyed (or, in the case of a YCbCr image, after the conversion has
* completed).
*
* Calling set with a YCbCr image will encode a compute pass to convert the image from
* YCbCr to RGB.
* Calling set with a YCbCr image will encode a compute pass to convert the image from YCbCr to
* RGB.
*/
static MetalExternalImage createFromImage(MetalContext& context, CVPixelBufferRef image);
void set(CVPixelBufferRef image) noexcept;
/**
* Create an external image with a specific plane of the passed-in CVPixelBuffer.
*
* Ownership is taken of the CVPixelBuffer, which will be released when the returned
* MetalExternalImage is destroyed.
* Set this external image to a specific plane of the passed-in CVPixelBuffer. Future calls to
* getMetalTextureForDraw will return a texture backed by a single plane of this CVPixelBuffer.
* Previous CVPixelBuffers and related resources will be released when all GPU work using them
* has finished.
*/
static MetalExternalImage createFromImagePlane(
MetalContext& context, CVPixelBufferRef image, uint32_t plane);
void set(CVPixelBufferRef image, size_t plane) noexcept;
static void assertWritableImage(CVPixelBufferRef image);
/**
* Returns the width of the external image, or 0 if one is not set. For YCbCr images, returns
* the width of the luminance plane.
*/
size_t getWidth() const noexcept { return mWidth; }
/**
* Returns the height of the external image, or 0 if one is not set. For YCbCr images, returns
* the height of the luminance plane.
*/
size_t getHeight() const noexcept { return mHeight; }
/**
* Get a Metal texture used to draw this image and denote that it is used for the current frame.
* For future frames that use this external image, getMetalTextureForDraw must be called again.
*/
id<MTLTexture> getMetalTextureForDraw() const noexcept;
/**
* Free resources. Should be called at least once when no further calls to set will occur.
*/
static void shutdown(MetalContext& context) noexcept;
private:
MetalExternalImage(CVPixelBufferRef image, CVMetalTextureRef texture) noexcept
: mImage(image), mTexture(texture) {}
explicit MetalExternalImage(id<MTLTexture> texture) noexcept : mRgbTexture(texture) {}
static void assertWritableImage(CVPixelBufferRef image);
static id<MTLTexture> createRgbTexture(id<MTLDevice> device, size_t width, size_t height);
static CVMetalTextureRef createTextureFromImage(CVMetalTextureCacheRef textureCache,
CVPixelBufferRef image, MTLPixelFormat format, size_t plane);
static void ensureComputePipelineState(MetalContext& context);
static id<MTLCommandBuffer> encodeColorConversionPass(MetalContext& context,
id<MTLTexture> inYPlane, id<MTLTexture> inCbCrTexture, id<MTLTexture> outTexture);
private:
void unset();
CVMetalTextureRef createTextureFromImage(CVPixelBufferRef image, MTLPixelFormat format,
size_t plane);
id<MTLTexture> createRgbTexture(size_t width, size_t height);
id<MTLTexture> createSwizzledTextureView(id<MTLTexture> texture) const;
id<MTLTexture> createSwizzledTextureView(CVMetalTextureRef texture) const;
void ensureComputePipelineState();
id<MTLCommandBuffer> encodeColorConversionPass(id<MTLTexture> inYPlane, id<MTLTexture>
inCbCrTexture, id<MTLTexture> outTexture);
static constexpr size_t Y_PLANE = 0;
static constexpr size_t CBCR_PLANE = 1;
// TODO: this could probably be a union.
MetalContext& mContext;
// If the external image has a single plane, mImage and mTexture hold references to the image
// and created Metal texture, respectively.
// mTextureView is a view of mTexture with any swizzling applied.
CVPixelBufferRef mImage = nullptr;
CVMetalTextureRef mTexture = nullptr;
id<MTLTexture> mTextureView = nullptr;
size_t mWidth = 0;
size_t mHeight = 0;
// If the external image is in the YCbCr format, this holds the result of the converted RGB
// texture.
id<MTLTexture> mRgbTexture = nil;
struct {
TextureSwizzle r, g, b, a;
} mSwizzle;
};
} // namespace backend

View File

@@ -34,6 +34,10 @@
namespace filament {
namespace backend {
static const auto cvBufferDeleter = [](const void* buffer) {
CVBufferRelease((CVMetalTextureRef) buffer);
};
static const char* kernel = R"(
#include <metal_stdlib>
#include <simd/simd.h>
@@ -67,30 +71,18 @@ ycbcrToRgb(texture2d<half, access::read> inYTexture [[texture(0)]],
}
)";
NSUInteger MetalExternalImage::getWidth() const noexcept {
if (mImage) {
return CVPixelBufferGetWidth(mImage);
}
if (mRgbTexture) {
return mRgbTexture.width;
}
return 0;
MetalExternalImage::MetalExternalImage(MetalContext& context, TextureSwizzle r, TextureSwizzle g,
TextureSwizzle b, TextureSwizzle a) noexcept : mContext(context), mSwizzle{r, g, b, a} { }
bool MetalExternalImage::isValid() const noexcept {
return mRgbTexture != nil || mImage != nullptr;
}
NSUInteger MetalExternalImage::getHeight() const noexcept {
if (mImage) {
return CVPixelBufferGetHeight(mImage);
}
if (mRgbTexture) {
return mRgbTexture.height;
}
return 0;
}
void MetalExternalImage::set(CVPixelBufferRef image) noexcept {
unset();
MetalExternalImage MetalExternalImage::createFromImage(
MetalContext& context, CVPixelBufferRef image) {
if (!image) {
return {};
return;
}
OSType formatType = CVPixelBufferGetPixelFormatType(image);
@@ -104,29 +96,30 @@ MetalExternalImage MetalExternalImage::createFromImage(
<< ".";
if (planeCount == 0) {
CVMetalTextureRef texture =
createTextureFromImage(context.textureCache, image, MTLPixelFormatBGRA8Unorm, 0);
return { CVPixelBufferRetain(image), texture };
mImage = image;
mTexture = createTextureFromImage(image, MTLPixelFormatBGRA8Unorm, 0);
mTextureView = createSwizzledTextureView(mTexture);
mWidth = CVPixelBufferGetWidth(image);
mHeight = CVPixelBufferGetHeight(image);
}
if (planeCount == 2) {
CVPixelBufferRetain(image);
CVMetalTextureRef yPlane =
createTextureFromImage(context.textureCache, image, MTLPixelFormatR8Unorm, Y_PLANE);
CVMetalTextureRef cbcrPlane =
createTextureFromImage(context.textureCache, image, MTLPixelFormatRG8Unorm, CBCR_PLANE);
CVMetalTextureRef yPlane = createTextureFromImage(image, MTLPixelFormatR8Unorm, Y_PLANE);
CVMetalTextureRef cbcrPlane = createTextureFromImage(image, MTLPixelFormatRG8Unorm,
CBCR_PLANE);
// Get the size of luminance plane.
NSUInteger width = CVPixelBufferGetWidthOfPlane(image, Y_PLANE);
NSUInteger height = CVPixelBufferGetHeightOfPlane(image, Y_PLANE);
mWidth = CVPixelBufferGetWidthOfPlane(image, Y_PLANE);
mHeight = CVPixelBufferGetHeightOfPlane(image, Y_PLANE);
id<MTLTexture> rgbTexture = createRgbTexture(context.device, width, height);
id<MTLCommandBuffer> commandBuffer = encodeColorConversionPass(context,
id<MTLTexture> rgbTexture = createRgbTexture(mWidth, mHeight);
id<MTLCommandBuffer> commandBuffer = encodeColorConversionPass(
CVMetalTextureGetTexture(yPlane),
CVMetalTextureGetTexture(cbcrPlane),
rgbTexture);
mRgbTexture = createSwizzledTextureView(rgbTexture);
[commandBuffer addCompletedHandler:^(id <MTLCommandBuffer> o) {
CVBufferRelease(yPlane);
CVBufferRelease(cbcrPlane);
@@ -134,83 +127,70 @@ MetalExternalImage MetalExternalImage::createFromImage(
}];
[commandBuffer commit];
return MetalExternalImage { rgbTexture };
}
return {};
}
MetalExternalImage MetalExternalImage::createFromImagePlane(
MetalContext& context, CVPixelBufferRef image, uint32_t plane) {
void MetalExternalImage::set(CVPixelBufferRef image, size_t plane) noexcept {
unset();
if (!image) {
return {};
return;
}
const OSType formatType = CVPixelBufferGetPixelFormatType(image);
FILAMENT_CHECK_POSTCONDITION(formatType == kCVPixelFormatType_420YpCbCr8BiPlanarFullRange)
<< "Metal planar external images must be in the 420f format.";
FILAMENT_CHECK_POSTCONDITION(plane == 0 || plane == 1)
<< "Metal planar external images must be created from planes 0 or 1.";
auto getPlaneFormat = [](size_t plane) {
// Right now Metal only supports kCVPixelFormatType_420YpCbCr8BiPlanarFullRange planar
// external images, so we can make the following assumptions about the format of each plane.
if (plane == 0) {
return MTLPixelFormatR8Unorm; // luminance
}
if (plane == 1) {
return MTLPixelFormatRG8Unorm; // CbCr
}
return MTLPixelFormatInvalid;
mImage = image;
auto getPlaneFormat = [] (size_t plane) {
// Right now Metal only supports kCVPixelFormatType_420YpCbCr8BiPlanarFullRange planar
// external images, so we can make the following assumptions about the format of each plane.
if (plane == 0) {
return MTLPixelFormatR8Unorm; // luminance
}
if (plane == 1) {
// CbCr
return MTLPixelFormatRG8Unorm; // CbCr
}
return MTLPixelFormatInvalid;
};
const MTLPixelFormat format = getPlaneFormat(plane);
assert_invariant(format != MTLPixelFormatInvalid);
CVMetalTextureRef mTexture = createTextureFromImage(context.textureCache, image, format, plane);
return { CVPixelBufferRetain(image), mTexture };
mTexture = createTextureFromImage(image, format, plane);
mTextureView = createSwizzledTextureView(mTexture);
}
MetalExternalImage::MetalExternalImage(MetalExternalImage&& rhs) {
std::swap(mImage, rhs.mImage);
std::swap(mTexture, rhs.mTexture);
std::swap(mRgbTexture, rhs.mRgbTexture);
}
MetalExternalImage& MetalExternalImage::operator=(MetalExternalImage&& rhs) {
CVPixelBufferRelease(mImage);
CVBufferRelease(mTexture);
mImage = nullptr;
mTexture = nullptr;
mRgbTexture = nullptr;
std::swap(mImage, rhs.mImage);
std::swap(mTexture, rhs.mTexture);
std::swap(mRgbTexture, rhs.mRgbTexture);
return *this;
}
MetalExternalImage::~MetalExternalImage() noexcept {
CVPixelBufferRelease(mImage);
CVBufferRelease(mTexture);
}
id<MTLTexture> MetalExternalImage::getMtlTexture() const noexcept {
id<MTLTexture> MetalExternalImage::getMetalTextureForDraw() const noexcept {
if (mRgbTexture) {
return mRgbTexture;
}
if (mTexture) {
return CVMetalTextureGetTexture(mTexture);
// Retain the image and Metal texture until the GPU has finished with this frame. This does
// not need to be done for the RGB texture, because it is an Objective-C object whose
// lifetime is automatically managed by Metal.
auto& tracker = mContext.resourceTracker;
auto commandBuffer = getPendingCommandBuffer(&mContext);
if (tracker.trackResource((__bridge void*) commandBuffer, mImage, cvBufferDeleter)) {
CVPixelBufferRetain(mImage);
}
return nil;
if (tracker.trackResource((__bridge void*) commandBuffer, mTexture, cvBufferDeleter)) {
CVBufferRetain(mTexture);
}
assert_invariant(mTextureView);
return mTextureView;
}
CVMetalTextureRef MetalExternalImage::createTextureFromImage(CVMetalTextureCacheRef textureCache,
CVPixelBufferRef image, MTLPixelFormat format, size_t plane) {
CVMetalTextureRef MetalExternalImage::createTextureFromImage(CVPixelBufferRef image,
MTLPixelFormat format, size_t plane) {
const size_t width = CVPixelBufferGetWidthOfPlane(image, plane);
const size_t height = CVPixelBufferGetHeightOfPlane(image, plane);
CVMetalTextureRef texture;
CVReturn result = CVMetalTextureCacheCreateTextureFromImage(kCFAllocatorDefault, textureCache,
image, nullptr, format, width, height, plane, &texture);
CVReturn result = CVMetalTextureCacheCreateTextureFromImage(kCFAllocatorDefault,
mContext.textureCache, image, nullptr, format, width, height, plane, &texture);
FILAMENT_CHECK_POSTCONDITION(result == kCVReturnSuccess)
<< "Could not create a CVMetalTexture from CVPixelBuffer.";
@@ -221,19 +201,58 @@ void MetalExternalImage::shutdown(MetalContext& context) noexcept {
context.externalImageComputePipelineState = nil;
}
id<MTLTexture> MetalExternalImage::createRgbTexture(
id<MTLDevice> device, size_t width, size_t height) {
void MetalExternalImage::assertWritableImage(CVPixelBufferRef image) {
OSType formatType = CVPixelBufferGetPixelFormatType(image);
FILAMENT_CHECK_PRECONDITION(formatType == kCVPixelFormatType_32BGRA)
<< "Metal SwapChain images must be in the 32BGRA format.";
}
void MetalExternalImage::unset() {
CVPixelBufferRelease(mImage);
CVBufferRelease(mTexture);
mImage = nullptr;
mTexture = nullptr;
mTextureView = nil;
mRgbTexture = nil;
mWidth = 0;
mHeight = 0;
}
id<MTLTexture> MetalExternalImage::createRgbTexture(size_t width, size_t height) {
MTLTextureDescriptor *descriptor =
[MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA8Unorm
width:width
height:height
mipmapped:NO];
descriptor.usage = MTLTextureUsageShaderWrite | MTLTextureUsageShaderRead;
return [device newTextureWithDescriptor:descriptor];
return [mContext.device newTextureWithDescriptor:descriptor];
}
void MetalExternalImage::ensureComputePipelineState(MetalContext& context) {
if (context.externalImageComputePipelineState != nil) {
id<MTLTexture> MetalExternalImage::createSwizzledTextureView(id<MTLTexture> texture) const {
const bool isDefaultSwizzle =
mSwizzle.r == TextureSwizzle::CHANNEL_0 &&
mSwizzle.g == TextureSwizzle::CHANNEL_1 &&
mSwizzle.b == TextureSwizzle::CHANNEL_2 &&
mSwizzle.a == TextureSwizzle::CHANNEL_3;
if (!isDefaultSwizzle && mContext.supportsTextureSwizzling) {
// Even though we've already checked supportsTextureSwizzling, we still need to guard these
// calls with @availability, otherwise the API usage will generate compiler warnings.
if (@available(iOS 13, *)) {
texture = createTextureViewWithSwizzle(texture,
getSwizzleChannels(mSwizzle.r, mSwizzle.g, mSwizzle.b, mSwizzle.a));
}
}
return texture;
}
id<MTLTexture> MetalExternalImage::createSwizzledTextureView(CVMetalTextureRef ref) const {
id<MTLTexture> texture = CVMetalTextureGetTexture(ref);
return createSwizzledTextureView(texture);
}
void MetalExternalImage::ensureComputePipelineState() {
if (mContext.externalImageComputePipelineState != nil) {
return;
}
@@ -241,28 +260,29 @@ void MetalExternalImage::ensureComputePipelineState(MetalContext& context) {
NSString* objcSource = [NSString stringWithCString:kernel
encoding:NSUTF8StringEncoding];
id<MTLLibrary> library = [context.device newLibraryWithSource:objcSource
options:nil
error:&error];
id<MTLLibrary> library = [mContext.device newLibraryWithSource:objcSource
options:nil
error:&error];
NSERROR_CHECK("Unable to compile Metal shading library.");
id<MTLFunction> kernelFunction = [library newFunctionWithName:@"ycbcrToRgb"];
context.externalImageComputePipelineState =
[context.device newComputePipelineStateWithFunction:kernelFunction error:&error];
mContext.externalImageComputePipelineState =
[mContext.device newComputePipelineStateWithFunction:kernelFunction
error:&error];
NSERROR_CHECK("Unable to create Metal compute pipeline state.");
}
id<MTLCommandBuffer> MetalExternalImage::encodeColorConversionPass(MetalContext& context,
id<MTLTexture> inYPlane, id<MTLTexture> inCbCrTexture, id<MTLTexture> outTexture) {
ensureComputePipelineState(context);
id<MTLCommandBuffer> MetalExternalImage::encodeColorConversionPass(id<MTLTexture> inYPlane,
id<MTLTexture> inCbCrTexture, id<MTLTexture> outTexture) {
ensureComputePipelineState();
id<MTLCommandBuffer> commandBuffer = [context.commandQueue commandBuffer];
id<MTLCommandBuffer> commandBuffer = [mContext.commandQueue commandBuffer];
commandBuffer.label = @"YCbCr to RGB conversion";
id<MTLComputeCommandEncoder> computeEncoder = [commandBuffer computeCommandEncoder];
[computeEncoder setComputePipelineState:context.externalImageComputePipelineState];
[computeEncoder setComputePipelineState:mContext.externalImageComputePipelineState];
[computeEncoder setTexture:inYPlane atIndex:0];
[computeEncoder setTexture:inCbCrTexture atIndex:1];
[computeEncoder setTexture:outTexture atIndex:2];
@@ -280,11 +300,5 @@ id<MTLCommandBuffer> MetalExternalImage::encodeColorConversionPass(MetalContext&
return commandBuffer;
}
void MetalExternalImage::assertWritableImage(CVPixelBufferRef image) {
OSType formatType = CVPixelBufferGetPixelFormatType(image);
FILAMENT_CHECK_PRECONDITION(formatType == kCVPixelFormatType_32BGRA)
<< "Metal SwapChain images must be in the 32BGRA format.";
}
} // namespace backend
} // namespace filament

View File

@@ -44,7 +44,6 @@
#include <condition_variable>
#include <memory>
#include <type_traits>
#include <vector>
namespace filament {
namespace backend {
@@ -74,8 +73,7 @@ public:
void releaseDrawable();
void setFrameScheduledCallback(
CallbackHandler* handler, FrameScheduledCallback&& callback, uint64_t flags);
void setFrameScheduledCallback(CallbackHandler* handler, FrameScheduledCallback&& callback);
void setFrameCompletedCallback(
CallbackHandler* handler, utils::Invocable<void(void)>&& callback);
@@ -86,8 +84,6 @@ public:
NSUInteger getSurfaceWidth() const;
NSUInteger getSurfaceHeight() const;
bool isPixelBuffer() const { return type == SwapChainType::CVPIXELBUFFERREF; }
private:
enum class SwapChainType {
@@ -97,6 +93,7 @@ private:
};
bool isCaMetalLayer() const { return type == SwapChainType::CAMETALLAYER; }
bool isHeadless() const { return type == SwapChainType::HEADLESS; }
bool isPixelBuffer() const { return type == SwapChainType::CVPIXELBUFFERREF; }
void scheduleFrameScheduledCallback();
void scheduleFrameCompletedCallback();
@@ -124,7 +121,6 @@ private:
struct {
CallbackHandler* handler = nullptr;
std::shared_ptr<FrameScheduledCallback> callback = nullptr;
uint64_t flags = 0;
} frameScheduled;
struct {
@@ -135,17 +131,19 @@ private:
class MetalBufferObject : public HwBufferObject {
public:
using TagResolver = MetalBuffer::TagResolver;
MetalBufferObject(MetalContext& context, BufferObjectBinding bindingType, BufferUsage usage,
uint32_t byteCount);
void updateBuffer(void* data, size_t size, uint32_t byteOffset, TagResolver&& getHandleTag);
void updateBufferUnsynchronized(
void* data, size_t size, uint32_t byteOffset, TagResolver&& getHandleTag);
void updateBuffer(void* data, size_t size, uint32_t byteOffset);
void updateBufferUnsynchronized(void* data, size_t size, uint32_t byteOffset);
MetalBuffer* getBuffer() { return &buffer; }
// Tracks which uniform/ssbo buffers this buffer object is bound into.
static_assert(Program::UNIFORM_BINDING_COUNT <= 32);
static_assert(MAX_SSBO_COUNT <= 32);
utils::bitset32 boundUniformBuffers;
utils::bitset32 boundSsbos;
private:
MetalBuffer buffer;
};
@@ -202,11 +200,12 @@ public:
MetalProgram(MetalContext& context, Program&& program) noexcept;
const MetalShaderCompiler::MetalFunctionBundle& getFunctions();
const MetalShaderCompiler::MetalFunctionBundle& getFunctionsIfPresent() const;
const Program::SamplerGroupInfo& getSamplerGroupInfo() { return samplerGroupInfo; }
private:
void initialize();
Program::SamplerGroupInfo samplerGroupInfo;
MetalContext& mContext;
MetalShaderCompiler::MetalFunctionBundle mFunctionBundle;
MetalShaderCompiler::program_token_t mToken;
@@ -228,42 +227,43 @@ struct PixelBufferShape {
class MetalTexture : public HwTexture {
public:
MetalTexture(MetalContext& context, SamplerType target, uint8_t levels, TextureFormat format,
uint8_t samples, uint32_t width, uint32_t height, uint32_t depth,
TextureUsage usage) noexcept;
// constructors for creating texture views
MetalTexture(MetalContext& context, MetalTexture const* src, uint8_t baseLevel,
uint8_t levelCount) noexcept;
MetalTexture(MetalContext& context, MetalTexture const* src, TextureSwizzle r, TextureSwizzle g,
TextureSwizzle b, TextureSwizzle a) noexcept;
uint8_t samples, uint32_t width, uint32_t height, uint32_t depth, TextureUsage usage,
TextureSwizzle r, TextureSwizzle g, TextureSwizzle b, TextureSwizzle a)
noexcept;
// Constructor for importing an id<MTLTexture> outside of Filament.
MetalTexture(MetalContext& context, SamplerType target, uint8_t levels, TextureFormat format,
uint8_t samples, uint32_t width, uint32_t height, uint32_t depth, TextureUsage usage,
id<MTLTexture> texture) noexcept;
// Constructors for importing external images.
MetalTexture(MetalContext& context, TextureFormat format, uint32_t width, uint32_t height,
TextureUsage usage, CVPixelBufferRef image) noexcept;
MetalTexture(MetalContext& context, TextureFormat format, uint32_t width, uint32_t height,
TextureUsage usage, CVPixelBufferRef image, uint32_t plane) noexcept;
~MetalTexture();
// Returns an id<MTLTexture> suitable for reading in a shader, taking into account swizzle.
id<MTLTexture> getMtlTextureForRead() const noexcept;
// Returns an id<MTLTexture> suitable for reading in a shader, taking into account swizzle and
// LOD clamping.
id<MTLTexture> getMtlTextureForRead() noexcept;
// Returns the id<MTLTexture> for attaching to a render pass.
id<MTLTexture> getMtlTextureForWrite() const noexcept {
id<MTLTexture> getMtlTextureForWrite() noexcept {
return texture;
}
std::shared_ptr<MetalExternalImage> getExternalImage() const noexcept { return externalImage; }
void loadImage(uint32_t level, MTLRegion region, PixelBufferDescriptor& p) noexcept;
void generateMipmaps() noexcept;
// A texture starts out with none of its mip levels (also referred to as LODs) available for
// reading. 4 actions update the range of LODs available:
// - calling loadImage
// - calling generateMipmaps
// - using the texture as a render target attachment
// - calling setMinMaxLevels
// A texture's available mips are consistent throughout a render pass.
void setLodRange(uint16_t minLevel, uint16_t maxLevel);
void extendLodRangeTo(uint16_t level);
static MTLPixelFormat decidePixelFormat(MetalContext* context, TextureFormat format);
MetalContext& context;
MetalExternalImage externalImage;
// A "sidecar" texture used to implement automatic MSAA resolve.
// This is created by MetalRenderTarget and stored here so it can be used with multiple
@@ -272,6 +272,26 @@ public:
MTLPixelFormat devicePixelFormat;
// Frees memory associated with this texture and marks it as "terminated".
// Used to track "use after free" scenario.
void terminate() noexcept;
bool isTerminated() const noexcept { return terminated; }
inline void checkUseAfterFree(const char* samplerGroupDebugName, size_t textureIndex) const {
if (UTILS_LIKELY(!isTerminated())) {
return;
}
NSString* reason =
[NSString stringWithFormat:
@"Filament Metal texture use after free, sampler group = "
@"%s, texture index = %zu",
samplerGroupDebugName, textureIndex];
NSException* useAfterFreeException =
[NSException exceptionWithName:@"MetalTextureUseAfterFree"
reason:reason
userInfo:nil];
[useAfterFreeException raise];
}
private:
void loadSlice(uint32_t level, MTLRegion region, uint32_t byteOffset, uint32_t slice,
PixelBufferDescriptor const& data) noexcept;
@@ -282,12 +302,95 @@ private:
id<MTLTexture> texture = nil;
std::shared_ptr<MetalExternalImage> externalImage;
// If non-nil, a swizzled texture view to use instead of "texture".
// Filament swizzling only affects texture reads, so this should not be used when the texture is
// bound as a render target attachment.
id<MTLTexture> swizzledTextureView = nil;
id<MTLTexture> lodTextureView = nil;
uint16_t minLod = std::numeric_limits<uint16_t>::max();
uint16_t maxLod = 0;
bool terminated = false;
};
class MetalSamplerGroup : public HwSamplerGroup {
public:
explicit MetalSamplerGroup(size_t size, utils::FixedSizeString<32> name) noexcept
: size(size),
debugName(name),
textureHandles(size, Handle<HwTexture>()),
textures(size, nil),
samplers(size, nil) {}
inline void setTextureHandle(size_t index, Handle<HwTexture> th) {
assert_invariant(!finalized);
textureHandles[index] = th;
}
// This method is only used for debugging, to ensure all texture handles are alive.
const auto& getTextureHandles() const {
return textureHandles;
}
// Encode a MTLTexture into this SamplerGroup at the given index.
inline void setFinalizedTexture(size_t index, id<MTLTexture> t) {
assert_invariant(!finalized);
textures[index] = t;
}
// Encode a MTLSamplerState into this SamplerGroup at the given index.
inline void setFinalizedSampler(size_t index, id<MTLSamplerState> s) {
assert_invariant(!finalized);
samplers[index] = s;
}
// A SamplerGroup is "finalized" when all of its textures have been set and is ready for use in
// a draw call.
// Once a SamplerGroup is finalized, it must be reset or mutated to be written into again.
void finalize();
bool isFinalized() const noexcept { return finalized; }
// Both of these methods "unfinalize" a SamplerGroup, allowing it to be updated via calls to
// setFinalizedTexture or setFinalizedSampler. The difference is that when reset is called, all
// the samplers/textures must be rebound. The MTLArgumentEncoder must be specified, in case
// the texture types have changed.
// Mutate re-encodes the current set of samplers/textures into the new argument
// buffer.
void reset(id<MTLCommandBuffer> cmdBuffer, id<MTLArgumentEncoder> e, id<MTLDevice> device);
void mutate(id<MTLCommandBuffer> cmdBuffer);
id<MTLBuffer> getArgumentBuffer() const {
assert_invariant(finalized);
return argBuffer->getCurrentAllocation().first;
}
NSUInteger getArgumentBufferOffset() const {
return argBuffer->getCurrentAllocation().second;
}
inline std::pair<Handle<HwTexture>, id<MTLTexture>> getFinalizedTexture(size_t index) {
return {textureHandles[index], textures[index]};
}
// Calls the Metal useResource:usage:stages: method for all the textures in this SamplerGroup.
void useResources(id<MTLRenderCommandEncoder> renderPassEncoder);
size_t size;
utils::FixedSizeString<32> debugName;
public:
// These vectors are kept in sync with one another.
utils::FixedCapacityVector<Handle<HwTexture>> textureHandles;
utils::FixedCapacityVector<id<MTLTexture>> textures;
utils::FixedCapacityVector<id<MTLSamplerState>> samplers;
id<MTLArgumentEncoder> encoder;
std::unique_ptr<MetalRingBuffer> argBuffer = nullptr;
bool finalized = false;
};
class MetalRenderTarget : public HwRenderTarget {
@@ -438,67 +541,12 @@ struct MetalTimerQuery : public HwTimerQuery {
struct Status {
std::atomic<bool> available {false};
std::atomic<uint64_t> elapsed {0}; // only valid if available is true
uint64_t elapsed {0}; // only valid if available is true
};
std::shared_ptr<Status> status;
};
class MetalDescriptorSetLayout : public HwDescriptorSetLayout {
public:
MetalDescriptorSetLayout(DescriptorSetLayout&& layout) noexcept;
const auto& getBindings() const noexcept { return mLayout.bindings; }
size_t getDynamicOffsetCount() const noexcept { return mDynamicOffsetCount; }
/**
* Get an argument encoder for this descriptor set and shader stage.
* textureTypes should only include the textures present in the corresponding shader stage.
*/
id<MTLArgumentEncoder> getArgumentEncoder(id<MTLDevice> device, ShaderStage stage,
utils::FixedCapacityVector<MTLTextureType> const& textureTypes);
private:
id<MTLArgumentEncoder> getArgumentEncoderSlow(id<MTLDevice> device, ShaderStage stage,
utils::FixedCapacityVector<MTLTextureType> const& textureTypes);
DescriptorSetLayout mLayout;
size_t mDynamicOffsetCount = 0;
std::array<id<MTLArgumentEncoder>, Program::SHADER_TYPE_COUNT> mCachedArgumentEncoder = { nil };
std::array<utils::FixedCapacityVector<MTLTextureType>, Program::SHADER_TYPE_COUNT>
mCachedTextureTypes;
};
struct MetalDescriptorSet : public HwDescriptorSet {
MetalDescriptorSet(MetalDescriptorSetLayout* layout) noexcept;
void finalize(MetalDriver* driver);
id<MTLBuffer> finalizeAndGetBuffer(MetalDriver* driver, ShaderStage stage);
MetalDescriptorSetLayout* layout;
struct BufferBinding {
id<MTLBuffer> buffer;
uint32_t offset;
uint32_t size;
};
struct TextureBinding {
id<MTLTexture> texture;
SamplerParams sampler;
};
tsl::robin_map<descriptor_binding_t, BufferBinding> buffers;
tsl::robin_map<descriptor_binding_t, TextureBinding> textures;
std::vector<id<MTLResource>> vertexResources;
std::vector<id<MTLResource>> fragmentResources;
std::vector<std::shared_ptr<MetalExternalImage>> externalImages;
std::array<TrackedMetalBuffer, Program::SHADER_TYPE_COUNT> cachedBuffer = { nil };
};
} // namespace backend
} // namespace filament

View File

@@ -74,6 +74,7 @@ MetalSwapChain::MetalSwapChain(MetalContext& context, CAMetalLayer* nativeWindow
depthStencilFormat(decideDepthStencilFormat(flags)),
layer(nativeWindow),
layerDrawableMutex(std::make_shared<std::mutex>()),
externalImage(context),
type(SwapChainType::CAMETALLAYER) {
if (!(flags & SwapChain::CONFIG_TRANSPARENT) && !nativeWindow.opaque) {
@@ -99,15 +100,17 @@ MetalSwapChain::MetalSwapChain(MetalContext& context, int32_t width, int32_t hei
depthStencilFormat(decideDepthStencilFormat(flags)),
headlessWidth(width),
headlessHeight(height),
externalImage(context),
type(SwapChainType::HEADLESS) {}
MetalSwapChain::MetalSwapChain(MetalContext& context, CVPixelBufferRef pixelBuffer, uint64_t flags)
: context(context),
depthStencilFormat(decideDepthStencilFormat(flags)),
externalImage(MetalExternalImage::createFromImage(context, pixelBuffer)),
externalImage(context),
type(SwapChainType::CVPIXELBUFFERREF) {
assert_invariant(flags & SWAP_CHAIN_CONFIG_APPLE_CVPIXELBUFFER);
MetalExternalImage::assertWritableImage(pixelBuffer);
externalImage.set(pixelBuffer);
assert_invariant(externalImage.isValid());
}
@@ -118,6 +121,7 @@ MTLPixelFormat MetalSwapChain::decideDepthStencilFormat(uint64_t flags) {
}
MetalSwapChain::~MetalSwapChain() {
externalImage.set(nullptr);
}
NSUInteger MetalSwapChain::getSurfaceWidth() const {
@@ -167,7 +171,7 @@ id<MTLTexture> MetalSwapChain::acquireDrawable() {
}
if (isPixelBuffer()) {
return externalImage.getMtlTexture();
return externalImage.getMetalTextureForDraw();
}
assert_invariant(isCaMetalLayer());
@@ -229,10 +233,9 @@ void MetalSwapChain::ensureDepthStencilTexture() {
}
void MetalSwapChain::setFrameScheduledCallback(
CallbackHandler* handler, FrameScheduledCallback&& callback, uint64_t flags) {
CallbackHandler* handler, FrameScheduledCallback&& callback) {
frameScheduled.handler = handler;
frameScheduled.callback = std::make_shared<FrameScheduledCallback>(std::move(callback));
frameScheduled.flags = flags;
}
void MetalSwapChain::setFrameCompletedCallback(
@@ -254,6 +257,10 @@ void MetalSwapChain::present() {
}
}
#ifndef FILAMENT_RELEASE_PRESENT_DRAWABLE_MAIN_THREAD
#define FILAMENT_RELEASE_PRESENT_DRAWABLE_MAIN_THREAD 1
#endif
class PresentDrawableData {
public:
PresentDrawableData() = delete;
@@ -261,10 +268,10 @@ public:
PresentDrawableData& operator=(const PresentDrawableData&) = delete;
static PresentDrawableData* create(id<CAMetalDrawable> drawable,
std::shared_ptr<std::mutex> drawableMutex, MetalDriver* driver, uint64_t flags) {
std::shared_ptr<std::mutex> drawableMutex, MetalDriver* driver) {
assert_invariant(drawableMutex);
assert_invariant(driver);
return new PresentDrawableData(drawable, drawableMutex, driver, flags);
return new PresentDrawableData(drawable, drawableMutex, driver);
}
static void maybePresentAndDestroyAsync(PresentDrawableData* that, bool shouldPresent) {
@@ -272,23 +279,20 @@ public:
[that->mDrawable present];
}
if (that->mFlags & SwapChain::CALLBACK_DEFAULT_USE_METAL_COMPLETION_HANDLER) {
cleanupAndDestroy(that);
} else {
// mDrawable is acquired on the driver thread. Typically, we would release this object
// on the same thread, but after receiving consistent crash reports from within
// [CAMetalDrawable dealloc], we suspect this object requires releasing on the main
// thread.
dispatch_async(dispatch_get_main_queue(), ^{
cleanupAndDestroy(that);
});
}
#if FILAMENT_RELEASE_PRESENT_DRAWABLE_MAIN_THREAD == 1
// mDrawable is acquired on the driver thread. Typically, we would release this object on
// the same thread, but after receiving consistent crash reports from within
// [CAMetalDrawable dealloc], we suspect this object requires releasing on the main thread.
dispatch_async(dispatch_get_main_queue(), ^{ cleanupAndDestroy(that); });
#else
that->mDriver->runAtNextTick([that]() { cleanupAndDestroy(that); });
#endif
}
private:
PresentDrawableData(id<CAMetalDrawable> drawable, std::shared_ptr<std::mutex> drawableMutex,
MetalDriver* driver, uint64_t flags)
: mDrawable(drawable), mDrawableMutex(drawableMutex), mDriver(driver), mFlags(flags) {}
MetalDriver* driver)
: mDrawable(drawable), mDrawableMutex(drawableMutex), mDriver(driver) {}
static void cleanupAndDestroy(PresentDrawableData *that) {
if (that->mDrawable) {
@@ -303,7 +307,6 @@ private:
id<CAMetalDrawable> mDrawable;
std::shared_ptr<std::mutex> mDrawableMutex;
MetalDriver* mDriver = nullptr;
uint64_t mFlags = 0;
};
void presentDrawable(bool presentFrame, void* user) {
@@ -320,8 +323,8 @@ void MetalSwapChain::scheduleFrameScheduledCallback() {
struct Callback {
Callback(std::shared_ptr<FrameScheduledCallback> callback, id<CAMetalDrawable> drawable,
std::shared_ptr<std::mutex> drawableMutex, MetalDriver* driver, uint64_t flags)
: f(callback), data(PresentDrawableData::create(drawable, drawableMutex, driver, flags)) {}
std::shared_ptr<std::mutex> drawableMutex, MetalDriver* driver)
: f(callback), data(PresentDrawableData::create(drawable, drawableMutex, driver)) {}
std::shared_ptr<FrameScheduledCallback> f;
// PresentDrawableData* is destroyed by maybePresentAndDestroyAsync() later.
std::unique_ptr<PresentDrawableData> data;
@@ -336,19 +339,14 @@ void MetalSwapChain::scheduleFrameScheduledCallback() {
// This callback pointer will be captured by the block. Even if the scheduled handler is never
// called, the unique_ptr will still ensure we don't leak memory.
uint64_t const flags = frameScheduled.flags;
__block auto callback = std::make_unique<Callback>(
frameScheduled.callback, drawable, layerDrawableMutex, context.driver, flags);
frameScheduled.callback, drawable, layerDrawableMutex, context.driver);
backend::CallbackHandler* handler = frameScheduled.handler;
MetalDriver* driver = context.driver;
[getPendingCommandBuffer(&context) addScheduledHandler:^(id<MTLCommandBuffer> cb) {
Callback* user = callback.release();
if (flags & SwapChain::CALLBACK_DEFAULT_USE_METAL_COMPLETION_HANDLER) {
Callback::func(user);
} else {
driver->scheduleCallback(handler, user, &Callback::func);
}
driver->scheduleCallback(handler, user, &Callback::func);
}];
}
@@ -383,14 +381,12 @@ MetalBufferObject::MetalBufferObject(MetalContext& context, BufferObjectBinding
BufferUsage usage, uint32_t byteCount)
: HwBufferObject(byteCount), buffer(context, bindingType, usage, byteCount) {}
void MetalBufferObject::updateBuffer(
void* data, size_t size, uint32_t byteOffset, TagResolver&& getHandleTag) {
buffer.copyIntoBuffer(data, size, byteOffset, std::move(getHandleTag));
void MetalBufferObject::updateBuffer(void* data, size_t size, uint32_t byteOffset) {
buffer.copyIntoBuffer(data, size, byteOffset);
}
void MetalBufferObject::updateBufferUnsynchronized(
void* data, size_t size, uint32_t byteOffset, TagResolver&& getHandleTag) {
buffer.copyIntoBufferUnsynchronized(data, size, byteOffset, std::move(getHandleTag));
void MetalBufferObject::updateBufferUnsynchronized(void* data, size_t size, uint32_t byteOffset) {
buffer.copyIntoBufferUnsynchronized(data, size, byteOffset);
}
MetalVertexBufferInfo::MetalVertexBufferInfo(MetalContext& context, uint8_t bufferCount,
@@ -486,6 +482,11 @@ void MetalRenderPrimitive::setBuffers(MetalVertexBufferInfo const* const vbi,
MetalProgram::MetalProgram(MetalContext& context, Program&& program) noexcept
: HwProgram(program.getName()), mContext(context) {
// Save this program's SamplerGroupInfo, it's used during draw calls to bind sampler groups to
// the appropriate stage(s).
samplerGroupInfo = program.getSamplerGroupInfo();
mToken = context.shaderCompiler->createProgram(program.getName(), std::move(program));
assert_invariant(mToken);
}
@@ -495,10 +496,6 @@ const MetalShaderCompiler::MetalFunctionBundle& MetalProgram::getFunctions() {
return mFunctionBundle;
}
const MetalShaderCompiler::MetalFunctionBundle& MetalProgram::getFunctionsIfPresent() const {
return mFunctionBundle;
}
void MetalProgram::initialize() {
if (!mToken) {
return;
@@ -509,9 +506,10 @@ void MetalProgram::initialize() {
MetalTexture::MetalTexture(MetalContext& context, SamplerType target, uint8_t levels,
TextureFormat format, uint8_t samples, uint32_t width, uint32_t height, uint32_t depth,
TextureUsage usage) noexcept
: HwTexture(target, levels, samples, width, height, depth, format, usage), context(context) {
assert_invariant(target != SamplerType::SAMPLER_EXTERNAL);
TextureUsage usage, TextureSwizzle r, TextureSwizzle g, TextureSwizzle b,
TextureSwizzle a) noexcept
: HwTexture(target, levels, samples, width, height, depth, format, usage), context(context),
externalImage(context, r, g, b, a) {
devicePixelFormat = decidePixelFormat(&context, format);
FILAMENT_CHECK_POSTCONDITION(devicePixelFormat != MTLPixelFormatInvalid)
@@ -597,28 +595,16 @@ MetalTexture::MetalTexture(MetalContext& context, SamplerType target, uint8_t le
<< ", levels = " << int(levels) << ", MTLPixelFormat = " << int(devicePixelFormat)
<< ", width = " << width << ", height = " << height << ", depth = " << depth
<< "). Out of memory?";
}
MetalTexture::MetalTexture(MetalContext& context, MetalTexture const* src, uint8_t baseLevel,
uint8_t levelCount) noexcept
: HwTexture(src->target, src->levels, src->samples, src->width, src->height, src->depth,
src->format, src->usage),
context(context),
devicePixelFormat(src->devicePixelFormat),
externalImage(src->externalImage) {
texture = createTextureViewWithLodRange(
src->getMtlTextureForRead(), baseLevel, baseLevel + levelCount - 1);
}
MetalTexture::MetalTexture(MetalContext& context, MetalTexture const* src, TextureSwizzle r,
TextureSwizzle g, TextureSwizzle b, TextureSwizzle a) noexcept
: HwTexture(src->target, src->levels, src->samples, src->width, src->height, src->depth,
src->format, src->usage),
context(context),
devicePixelFormat(src->devicePixelFormat),
externalImage(src->externalImage) {
texture = src->getMtlTextureForRead();
if (context.supportsTextureSwizzling) {
// If swizzling is set, set up a swizzled texture view that we'll use when sampling this texture.
const bool isDefaultSwizzle =
r == TextureSwizzle::CHANNEL_0 &&
g == TextureSwizzle::CHANNEL_1 &&
b == TextureSwizzle::CHANNEL_2 &&
a == TextureSwizzle::CHANNEL_3;
// If texture is nil, then it must be a SAMPLER_EXTERNAL texture.
// Swizzling for external textures is handled inside MetalExternalImage.
if (!isDefaultSwizzle && texture && context.supportsTextureSwizzling) {
// Even though we've already checked context.supportsTextureSwizzling, we still need to
// guard these calls with @availability, otherwise the API usage will generate compiler
// warnings.
@@ -632,30 +618,44 @@ MetalTexture::MetalTexture(MetalContext& context, MetalTexture const* src, Textu
MetalTexture::MetalTexture(MetalContext& context, SamplerType target, uint8_t levels, TextureFormat format,
uint8_t samples, uint32_t width, uint32_t height, uint32_t depth, TextureUsage usage,
id<MTLTexture> metalTexture) noexcept
: HwTexture(target, levels, samples, width, height, depth, format, usage), context(context) {
: HwTexture(target, levels, samples, width, height, depth, format, usage), context(context),
externalImage(context) {
texture = metalTexture;
setLodRange(0, levels - 1);
}
MetalTexture::MetalTexture(MetalContext& context, TextureFormat format, uint32_t width,
uint32_t height, TextureUsage usage, CVPixelBufferRef image) noexcept
: HwTexture(SamplerType::SAMPLER_EXTERNAL, 1, 1, width, height, 1, format, usage),
context(context),
externalImage(std::make_shared<MetalExternalImage>(
MetalExternalImage::createFromImage(context, image))) {
texture = externalImage->getMtlTexture();
void MetalTexture::terminate() noexcept {
texture = nil;
swizzledTextureView = nil;
lodTextureView = nil;
msaaSidecar = nil;
externalImage.set(nullptr);
terminated = true;
}
MetalTexture::MetalTexture(MetalContext& context, TextureFormat format, uint32_t width,
uint32_t height, TextureUsage usage, CVPixelBufferRef image, uint32_t plane) noexcept
: HwTexture(SamplerType::SAMPLER_EXTERNAL, 1, 1, width, height, 1, format, usage),
context(context),
externalImage(std::make_shared<MetalExternalImage>(
MetalExternalImage::createFromImagePlane(context, image, plane))) {
texture = externalImage->getMtlTexture();
MetalTexture::~MetalTexture() {
externalImage.set(nullptr);
}
id<MTLTexture> MetalTexture::getMtlTextureForRead() const noexcept {
return swizzledTextureView ? swizzledTextureView : texture;
id<MTLTexture> MetalTexture::getMtlTextureForRead() noexcept {
if (lodTextureView) {
return lodTextureView;
}
// The texture's swizzle remains constant throughout its lifetime, however its LOD range can
// change. We'll cache the LOD view, and set lodTextureView to nil if minLod or maxLod is
// updated.
id<MTLTexture> t = swizzledTextureView ? swizzledTextureView : texture;
if (!t) {
return nil;
}
if (UTILS_UNLIKELY(minLod > maxLod)) {
// If the texture does not have any available LODs, provide a view of only level 0.
// Filament should prevent this from ever occurring.
lodTextureView = createTextureViewWithLodRange(t, 0, 0);
return lodTextureView;
}
lodTextureView = createTextureViewWithLodRange(t, minLod, maxLod);
return lodTextureView;
}
MTLPixelFormat MetalTexture::decidePixelFormat(MetalContext* context, TextureFormat format) {
@@ -774,12 +774,15 @@ void MetalTexture::loadImage(uint32_t level, MTLRegion region, PixelBufferDescri
assert_invariant(false);
}
}
extendLodRangeTo(level);
}
void MetalTexture::generateMipmaps() noexcept {
id <MTLBlitCommandEncoder> blitEncoder = [getPendingCommandBuffer(&context) blitCommandEncoder];
[blitEncoder generateMipmapsForTexture:texture];
[blitEncoder endEncoding];
setLodRange(0, texture.mipmapLevelCount - 1);
}
void MetalTexture::loadSlice(uint32_t level, MTLRegion region, uint32_t byteOffset, uint32_t slice,
@@ -903,6 +906,98 @@ void MetalTexture::loadWithBlit(uint32_t level, uint32_t slice, MTLRegion region
context.blitter->blit(getPendingCommandBuffer(&context), args, "Texture upload blit");
}
void MetalTexture::extendLodRangeTo(uint16_t level) {
assert_invariant(!isInRenderPass(&context));
minLod = std::min(minLod, level);
maxLod = std::max(maxLod, level);
lodTextureView = nil;
}
void MetalTexture::setLodRange(uint16_t min, uint16_t max) {
assert_invariant(!isInRenderPass(&context));
assert_invariant(min <= max);
minLod = min;
maxLod = max;
lodTextureView = nil;
}
void MetalSamplerGroup::finalize() {
assert_invariant(encoder);
// TODO: we should be able to encode textures and samplers inside setFinalizedTexture and
// setFinalizedSampler as they become available, but Metal doesn't seem to like this; the arg
// buffer gets encoded incorrectly. This warrants more investigation.
auto [buffer, offset] = argBuffer->getCurrentAllocation();
[encoder setArgumentBuffer:buffer offset:offset];
// Encode all textures and samplers.
for (size_t s = 0; s < size; s++) {
[encoder setTexture:textures[s] atIndex:(s * 2 + 0)];
[encoder setSamplerState:samplers[s] atIndex:(s * 2 + 1)];
}
finalized = true;
}
void MetalSamplerGroup::reset(id<MTLCommandBuffer> cmdBuffer, id<MTLArgumentEncoder> e,
id<MTLDevice> device) {
encoder = e;
// The number of slots in the ring buffer we use to manage argument buffer allocations.
// This number was chosen to avoid running out of slots and having to allocate a "fallback"
// buffer when SamplerGroups are updated multiple times a frame. This value can reduced after
// auditing Filament's calls to updateSamplerGroup, which should be as few times as possible.
// For example, the bloom downsample pass should be refactored to maintain two separate
// MaterialInstances instead of "ping ponging" between two texture bindings, which causes a
// single SamplerGroup to be updated many times a frame.
static constexpr auto METAL_ARGUMENT_BUFFER_SLOTS = 32;
MTLSizeAndAlign argBufferLayout;
argBufferLayout.size = encoder.encodedLength;
argBufferLayout.align = encoder.alignment;
// Chances are, even though the MTLArgumentEncoder might change, the required size and alignment
// probably won't. So we can re-use the previous ring buffer.
if (UTILS_UNLIKELY(!argBuffer || !argBuffer->canAccomodateLayout(argBufferLayout))) {
argBuffer = std::make_unique<MetalRingBuffer>(device, MTLResourceStorageModeShared,
argBufferLayout, METAL_ARGUMENT_BUFFER_SLOTS);
} else {
argBuffer->createNewAllocation(cmdBuffer);
}
// Clear all textures and samplers.
assert_invariant(textureHandles.size() == textures.size());
assert_invariant(textures.size() == samplers.size());
for (size_t s = 0; s < textureHandles.size(); s++) {
textureHandles[s] = {};
textures[s] = nil;
samplers[s] = nil;
}
finalized = false;
}
void MetalSamplerGroup::mutate(id<MTLCommandBuffer> cmdBuffer) {
assert_invariant(finalized); // only makes sense to mutate if this sampler group is finalized
assert_invariant(argBuffer);
argBuffer->createNewAllocation(cmdBuffer);
finalized = false;
}
void MetalSamplerGroup::useResources(id<MTLRenderCommandEncoder> renderPassEncoder) {
assert_invariant(finalized);
if (@available(iOS 13, *)) {
// TODO: pass only the appropriate stages to useResources.
[renderPassEncoder useResources:textures.data()
count:textures.size()
usage:MTLResourceUsageRead | MTLResourceUsageSample
stages:MTLRenderStageFragment | MTLRenderStageVertex];
} else {
[renderPassEncoder useResources:textures.data()
count:textures.size()
usage:MTLResourceUsageRead | MTLResourceUsageSample];
}
}
MetalRenderTarget::MetalRenderTarget(MetalContext* context, uint32_t width, uint32_t height,
uint8_t samples, Attachment colorAttachments[MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT],
Attachment depthAttachment, Attachment stencilAttachment) :
@@ -1248,195 +1343,5 @@ FenceStatus MetalFence::wait(uint64_t timeoutNs) {
return FenceStatus::ERROR;
}
MetalDescriptorSetLayout::MetalDescriptorSetLayout(DescriptorSetLayout&& l) noexcept
: mLayout(std::move(l)) {
size_t dynamicBindings = 0;
for (const auto& binding : mLayout.bindings) {
if (any(binding.flags & DescriptorFlags::DYNAMIC_OFFSET)) {
dynamicBindings++;
}
}
mDynamicOffsetCount = dynamicBindings;
}
id<MTLArgumentEncoder> MetalDescriptorSetLayout::getArgumentEncoder(id<MTLDevice> device, ShaderStage stage,
utils::FixedCapacityVector<MTLTextureType> const& textureTypes) {
auto const index = static_cast<size_t>(stage);
assert_invariant(index < mCachedArgumentEncoder.size());
if (mCachedArgumentEncoder[index] &&
std::equal(
textureTypes.begin(), textureTypes.end(), mCachedTextureTypes[index].begin())) {
return mCachedArgumentEncoder[index];
}
mCachedArgumentEncoder[index] = getArgumentEncoderSlow(device, stage, textureTypes);
mCachedTextureTypes[index] = textureTypes;
return mCachedArgumentEncoder[index];
}
id<MTLArgumentEncoder> MetalDescriptorSetLayout::getArgumentEncoderSlow(id<MTLDevice> device,
ShaderStage stage, utils::FixedCapacityVector<MTLTextureType> const& textureTypes) {
auto const& bindings = getBindings();
NSMutableArray<MTLArgumentDescriptor*>* arguments = [NSMutableArray new];
// Important! The bindings must be sorted by binding number. This has already been done inside
// createDescriptorSetLayout.
size_t textureIndex = 0;
for (auto const& binding : bindings) {
if (!hasShaderType(binding.stageFlags, stage)) {
continue;
}
switch (binding.type) {
case DescriptorType::UNIFORM_BUFFER:
case DescriptorType::SHADER_STORAGE_BUFFER: {
MTLArgumentDescriptor* bufferArgument = [MTLArgumentDescriptor argumentDescriptor];
bufferArgument.index = binding.binding * 2;
bufferArgument.dataType = MTLDataTypePointer;
bufferArgument.access = MTLArgumentAccessReadOnly;
[arguments addObject:bufferArgument];
break;
}
case DescriptorType::SAMPLER:
case DescriptorType::SAMPLER_EXTERNAL: {
MTLArgumentDescriptor* textureArgument = [MTLArgumentDescriptor argumentDescriptor];
textureArgument.index = binding.binding * 2;
textureArgument.dataType = MTLDataTypeTexture;
MTLTextureType textureType = MTLTextureType2D;
if (textureIndex < textureTypes.size()) {
textureType = textureTypes[textureIndex++];
}
textureArgument.textureType = textureType;
textureArgument.access = MTLArgumentAccessReadOnly;
[arguments addObject:textureArgument];
MTLArgumentDescriptor* samplerArgument = [MTLArgumentDescriptor argumentDescriptor];
samplerArgument.index = binding.binding * 2 + 1;
samplerArgument.dataType = MTLDataTypeSampler;
textureArgument.access = MTLArgumentAccessReadOnly;
[arguments addObject:samplerArgument];
break;
}
case DescriptorType::INPUT_ATTACHMENT:
// TODO: support INPUT_ATTACHMENT
assert_invariant(false);
break;
}
}
return [device newArgumentEncoderWithArguments:arguments];
}
MetalDescriptorSet::MetalDescriptorSet(MetalDescriptorSetLayout* layout) noexcept
: layout(layout) {}
void MetalDescriptorSet::finalize(MetalDriver* driver) {
[driver->mContext->currentRenderPassEncoder useResource:driver->mContext->emptyBuffer
usage:MTLResourceUsageRead];
[driver->mContext->currentRenderPassEncoder
useResource:getOrCreateEmptyTexture(driver->mContext)
usage:MTLResourceUsageRead];
if (@available(iOS 13.0, *)) {
[driver->mContext->currentRenderPassEncoder useResources:vertexResources.data()
count:vertexResources.size()
usage:MTLResourceUsageRead
stages:MTLRenderStageVertex];
[driver->mContext->currentRenderPassEncoder useResources:fragmentResources.data()
count:fragmentResources.size()
usage:MTLResourceUsageRead
stages:MTLRenderStageFragment];
} else {
[driver->mContext->currentRenderPassEncoder useResources:vertexResources.data()
count:vertexResources.size()
usage:MTLResourceUsageRead];
[driver->mContext->currentRenderPassEncoder useResources:fragmentResources.data()
count:fragmentResources.size()
usage:MTLResourceUsageRead];
}
}
id<MTLBuffer> MetalDescriptorSet::finalizeAndGetBuffer(MetalDriver* driver, ShaderStage stage) {
auto const index = static_cast<size_t>(stage);
assert_invariant(index < cachedBuffer.size());
auto& buffer = cachedBuffer[index];
if (buffer) {
return buffer.get();
}
// Map all the texture bindings to their respective texture types.
auto const& bindings = layout->getBindings();
auto textureTypes = utils::FixedCapacityVector<MTLTextureType>::with_capacity(bindings.size());
for (auto const& binding : bindings) {
if (!hasShaderType(binding.stageFlags, stage)) {
continue;
}
MTLTextureType textureType = MTLTextureType2D;
if (auto found = textures.find(binding.binding); found != textures.end()) {
auto const& textureBinding = textures[binding.binding];
textureType = textureBinding.texture.textureType;
}
textureTypes.push_back(textureType);
}
MetalContext const& context = *driver->mContext;
id<MTLArgumentEncoder> encoder =
layout->getArgumentEncoder(context.device, stage, textureTypes);
{
ScopedAllocationTimer timer("descriptor_set");
buffer = { [context.device newBufferWithLength:encoder.encodedLength
options:MTLResourceStorageModeShared],
TrackedMetalBuffer::Type::DESCRIPTOR_SET };
}
[encoder setArgumentBuffer:buffer.get() offset:0];
for (auto const& binding : bindings) {
if (!hasShaderType(binding.stageFlags, stage)) {
continue;
}
switch (binding.type) {
case DescriptorType::UNIFORM_BUFFER:
case DescriptorType::SHADER_STORAGE_BUFFER: {
auto found = buffers.find(binding.binding);
if (found == buffers.end()) {
[encoder setBuffer:driver->mContext->emptyBuffer
offset:0
atIndex:binding.binding * 2];
continue;
}
auto const& bufferBinding = buffers[binding.binding];
[encoder setBuffer:bufferBinding.buffer
offset:bufferBinding.offset
atIndex:binding.binding * 2];
break;
}
case DescriptorType::SAMPLER:
case DescriptorType::SAMPLER_EXTERNAL: {
auto found = textures.find(binding.binding);
if (found == textures.end()) {
[encoder setTexture:driver->mContext->emptyTexture atIndex:binding.binding * 2];
id<MTLSamplerState> sampler =
driver->mContext->samplerStateCache.getOrCreateState({});
[encoder setSamplerState:sampler atIndex:binding.binding * 2 + 1];
continue;
}
auto const& textureBinding = textures[binding.binding];
[encoder setTexture:textureBinding.texture atIndex:binding.binding * 2];
SamplerState samplerState { .samplerParams = textureBinding.sampler };
id<MTLSamplerState> sampler =
driver->mContext->samplerStateCache.getOrCreateState(samplerState);
[encoder setSamplerState:sampler atIndex:binding.binding * 2 + 1];
break;
}
case DescriptorType::INPUT_ATTACHMENT:
assert_invariant(false);
break;
}
}
return buffer.get();
}
} // namespace backend
} // namespace filament

View File

@@ -83,10 +83,6 @@ public:
return std::get<Compute>(mPrograms);
}
bool isRaster() const { return std::holds_alternative<Raster>(mPrograms); }
bool isCompute() const { return std::holds_alternative<Compute>(mPrograms); }
static MetalFunctionBundle none() {
return MetalFunctionBundle(None{});
}

View File

@@ -28,35 +28,37 @@
#include <memory>
#include <tsl/robin_map.h>
#include <utils/Hash.h>
#include <utils/Invocable.h>
namespace filament {
namespace backend {
inline bool operator==(const SamplerParams& lhs, const SamplerParams& rhs) {
return SamplerParams::EqualTo{}(lhs, rhs);
}
// Rasterization Bindings
// ----------------------
// Bindings Buffer name Count
// ------------------------------------------------------
// 0 Zero buffer (placeholder vertex buffer) 1
// 1-16 Filament vertex buffers 16 limited by MAX_VERTEX_BUFFER_COUNT
// 20 Push constants 1
// 21-24 Descriptor sets (argument buffers) 4 limited by MAX_DESCRIPTOR_SET_COUNT
// 25 Dynamic offset buffer 1
// 17-25 Uniform buffers 9 Program::UNIFORM_BINDING_COUNT
// 26 Push constants 1
// 27-30 Sampler groups (argument buffers) 4 Program::SAMPLER_BINDING_COUNT
//
// Total 23
// Total 31
// Compute Bindings
// ----------------------
// Bindings Buffer name Count
// ------------------------------------------------------
// 0-3 SSBO buffers 4 MAX_SSBO_COUNT
// 20 Push constants 1
// 21-24 Descriptor sets (argument buffers) 4 limited by MAX_DESCRIPTOR_SET_COUNT
// 25 Dynamic offset buffer 1
// 17-25 Uniform buffers 9 Program::UNIFORM_BINDING_COUNT
// 26 Push constants 1
// 27-30 Sampler groups (argument buffers) 4 Program::SAMPLER_BINDING_COUNT
//
// Total 10
// Total 18
// The total number of vertex buffer "slots" that the Metal backend can bind.
// + 1 to account for the zero buffer, a placeholder buffer used internally by the Metal backend.
@@ -69,11 +71,10 @@ static constexpr uint32_t ZERO_VERTEX_BUFFER_BINDING = 0u;
static constexpr uint32_t USER_VERTEX_BUFFER_BINDING_START = 1u;
// These constants must match the equivalent in CodeGenerator.h.
static constexpr uint32_t PUSH_CONSTANT_BUFFER_INDEX = 20u;
static constexpr uint32_t DESCRIPTOR_SET_BINDING_START = 21u;
static constexpr uint32_t DYNAMIC_OFFSET_BINDING = 25u;
static constexpr uint32_t UNIFORM_BUFFER_BINDING_START = 17u;
static constexpr uint32_t SSBO_BINDING_START = 0u;
static constexpr uint32_t SAMPLER_GROUP_BINDING_START = 27u;
// Forward declarations necessary here, definitions at end of file.
inline bool operator==(const MTLViewport& lhs, const MTLViewport& rhs);
@@ -162,8 +163,6 @@ template<typename StateType,
typename HashFn = utils::hash::MurmurHashFn<StateType>>
class StateCache {
using MapType = tsl::robin_map<StateType, MetalType, HashFn>;
public:
StateCache() = default;
@@ -173,18 +172,6 @@ public:
void setDevice(id<MTLDevice> device) noexcept { mDevice = device; }
void removeIf(utils::Invocable<bool(const StateType&)> fn) noexcept {
typename MapType::const_iterator it = mStateCache.begin();
while (it != mStateCache.end()) {
const auto& [key, _] = *it;
if (UTILS_UNLIKELY(fn(key))) {
it = mStateCache.erase(it);
} else {
++it;
}
}
}
MetalType getOrCreateState(const StateType& state) noexcept {
assert_invariant(mDevice);
@@ -212,7 +199,7 @@ private:
StateCreator creator;
id<MTLDevice> mDevice = nil;
MapType mStateCache;
tsl::robin_map<StateType, MetalType, HashFn> mStateCache;
};
@@ -220,8 +207,9 @@ private:
// Different kinds of state, like pipeline state, uniform buffer state, etc., are passed to the
// current Metal command encoder and persist throughout the lifetime of the encoder (a frame).
// StateTracker is used to prevent calling redundant state change methods.
template <typename StateType, typename StateEqual = std::equal_to<StateType>>
template<typename StateType>
class StateTracker {
public:
// Call to force the state to dirty at the beginning of each frame, as all state must be
@@ -229,7 +217,7 @@ public:
void invalidate() noexcept { mStateDirty = true; }
void updateState(const StateType& newState) noexcept {
if (!StateEqual()(mCurrentState, newState)) {
if (mCurrentState != newState) {
mCurrentState = newState;
mStateDirty = true;
}
@@ -250,6 +238,7 @@ private:
bool mStateDirty = true;
StateType mCurrentState = {};
};
// Pipeline state
@@ -357,16 +346,6 @@ using DepthStencilStateTracker = StateTracker<DepthStencilState>;
using DepthStencilStateCache = StateCache<DepthStencilState, id<MTLDepthStencilState>,
DepthStateCreator>;
struct MtlScissorRectEqual {
bool operator()(const MTLScissorRect& lhs, const MTLScissorRect& rhs) const {
return lhs.height == rhs.height &&
lhs.width == rhs.width &&
lhs.x == rhs.x &&
lhs.y == rhs.y;
}
};
using ScissorRectStateTracker = StateTracker<MTLScissorRect, MtlScissorRectEqual>;
// Uniform buffers
class MetalBufferObject;
@@ -408,17 +387,14 @@ using DepthClampStateTracker = StateTracker<MTLDepthClipMode>;
// Argument encoder
struct ArgumentEncoderState {
NSUInteger bufferCount;
utils::FixedCapacityVector<MTLTextureType> textureTypes;
explicit ArgumentEncoderState(
NSUInteger bufferCount, utils::FixedCapacityVector<MTLTextureType>&& types)
: bufferCount(bufferCount), textureTypes(std::move(types)) {}
explicit ArgumentEncoderState(utils::FixedCapacityVector<MTLTextureType>&& types)
: textureTypes(std::move(types)) {}
bool operator==(const ArgumentEncoderState& rhs) const noexcept {
return std::equal(textureTypes.begin(), textureTypes.end(), rhs.textureTypes.begin(),
rhs.textureTypes.end()) &&
bufferCount == rhs.bufferCount;
rhs.textureTypes.end());
}
bool operator!=(const ArgumentEncoderState& rhs) const noexcept {
@@ -440,30 +416,6 @@ struct ArgumentEncoderCreator {
using ArgumentEncoderCache = StateCache<ArgumentEncoderState, id<MTLArgumentEncoder>,
ArgumentEncoderCreator, ArgumentEncoderHasher>;
template <NSUInteger N, ShaderStage stage>
class MetalBufferBindings {
public:
MetalBufferBindings() { invalidate(); }
void invalidate() {
mDirtyBuffers.reset();
mDirtyOffsets.reset();
for (int i = 0; i < int(N); i++) {
mDirtyBuffers.set(i, true);
mDirtyOffsets.set(i, true);
}
}
void setBuffer(const id<MTLBuffer> buffer, NSUInteger offset, NSUInteger index);
void bindBuffers(id<MTLCommandEncoder> encoder, NSUInteger startIndex);
private:
static_assert(N <= 8);
std::array<__weak id<MTLBuffer>, N> mBuffers = { nil };
std::array<NSUInteger, N> mOffsets = { 0 };
utils::bitset8 mDirtyBuffers;
utils::bitset8 mDirtyOffsets;
};
} // namespace backend
} // namespace filament

View File

@@ -166,40 +166,28 @@ id<MTLSamplerState> SamplerStateCreator::operator()(id<MTLDevice> device,
id<MTLArgumentEncoder> ArgumentEncoderCreator::operator()(id<MTLDevice> device,
const ArgumentEncoderState &state) noexcept {
const auto& textureTypes = state.textureTypes;
const auto& textureCount = textureTypes.size();
const auto& bufferCount = state.bufferCount;
assert_invariant(textureCount > 0);
const auto& count = textureTypes.size();
assert_invariant(count > 0);
// Metal has separate data types for textures versus samplers, so the argument buffer layout
// alternates between texture and sampler, i.e.:
// buffer0
// buffer1
// textureA
// samplerA
// textureB
// samplerB
// etc
NSMutableArray<MTLArgumentDescriptor*>* arguments =
[NSMutableArray arrayWithCapacity:(bufferCount + textureCount * 2)];
size_t i = 0;
for (size_t j = 0; j < bufferCount; j++) {
MTLArgumentDescriptor* bufferArgument = [MTLArgumentDescriptor argumentDescriptor];
bufferArgument.index = i++;
bufferArgument.dataType = MTLDataTypePointer;
bufferArgument.access = MTLArgumentAccessReadOnly;
[arguments addObject:bufferArgument];
}
for (size_t j = 0; j < textureCount; j++) {
[NSMutableArray arrayWithCapacity:(count * 2)];
for (size_t i = 0; i < count; i++) {
MTLArgumentDescriptor* textureArgument = [MTLArgumentDescriptor argumentDescriptor];
textureArgument.index = i++;
textureArgument.index = i * 2 + 0;
textureArgument.dataType = MTLDataTypeTexture;
textureArgument.textureType = textureTypes[i];
textureArgument.access = MTLArgumentAccessReadOnly;
[arguments addObject:textureArgument];
MTLArgumentDescriptor* samplerArgument = [MTLArgumentDescriptor argumentDescriptor];
samplerArgument.index = i++;
samplerArgument.index = i * 2 + 1;
samplerArgument.dataType = MTLDataTypeSampler;
textureArgument.access = MTLArgumentAccessReadOnly;
[arguments addObject:samplerArgument];
@@ -208,64 +196,5 @@ id<MTLArgumentEncoder> ArgumentEncoderCreator::operator()(id<MTLDevice> device,
return [device newArgumentEncoderWithArguments:arguments];
}
template <NSUInteger N, ShaderStage stage>
void MetalBufferBindings<N, stage>::setBuffer(const id<MTLBuffer> buffer, NSUInteger offset, NSUInteger index) {
assert_invariant(offset + 1 <= N);
if (mBuffers[index] != buffer) {
mBuffers[index] = buffer;
mDirtyBuffers.set(index);
}
if (mOffsets[index] != offset) {
mOffsets[index] = offset;
mDirtyOffsets.set(index);
}
}
template <NSUInteger N, ShaderStage stage>
void MetalBufferBindings<N, stage>::bindBuffers(
id<MTLCommandEncoder> encoder, NSUInteger startIndex) {
if (mDirtyBuffers.none() && mDirtyOffsets.none()) {
return;
}
utils::bitset8 onlyOffsetDirty = mDirtyOffsets & ~mDirtyBuffers;
onlyOffsetDirty.forEachSetBit([&](size_t i) {
if constexpr (stage == ShaderStage::VERTEX) {
[(id<MTLRenderCommandEncoder>)encoder setVertexBufferOffset:mOffsets[i]
atIndex:i + startIndex];
} else if constexpr (stage == ShaderStage::FRAGMENT) {
[(id<MTLRenderCommandEncoder>)encoder setFragmentBufferOffset:mOffsets[i]
atIndex:i + startIndex];
} else if constexpr (stage == ShaderStage::COMPUTE) {
[(id<MTLComputeCommandEncoder>)encoder setBufferOffset:mOffsets[i]
atIndex:i + startIndex];
}
});
mDirtyOffsets.reset();
mDirtyBuffers.forEachSetBit([&](size_t i) {
if constexpr (stage == ShaderStage::VERTEX) {
[(id<MTLRenderCommandEncoder>)encoder setVertexBuffer:mBuffers[i]
offset:mOffsets[i]
atIndex:i + startIndex];
} else if constexpr (stage == ShaderStage::FRAGMENT) {
[(id<MTLRenderCommandEncoder>)encoder setFragmentBuffer:mBuffers[i]
offset:mOffsets[i]
atIndex:i + startIndex];
} else if constexpr (stage == ShaderStage::COMPUTE) {
[(id<MTLComputeCommandEncoder>)encoder setBuffer:mBuffers[i]
offset:mOffsets[i]
atIndex:i + startIndex];
}
});
mDirtyBuffers.reset();
}
template class MetalBufferBindings<MAX_DESCRIPTOR_SET_COUNT, ShaderStage::VERTEX>;
template class MetalBufferBindings<MAX_DESCRIPTOR_SET_COUNT, ShaderStage::FRAGMENT>;
template class MetalBufferBindings<MAX_DESCRIPTOR_SET_COUNT, ShaderStage::COMPUTE>;
} // namespace backend
} // namespace filament

View File

@@ -54,7 +54,7 @@ void NoopDriver::beginFrame(int64_t monotonic_clock_ns,
}
void NoopDriver::setFrameScheduledCallback(Handle<HwSwapChain> sch,
CallbackHandler* handler, FrameScheduledCallback&& callback, uint64_t flags) {
CallbackHandler* handler, FrameScheduledCallback&& callback) {
}
@@ -99,6 +99,9 @@ void NoopDriver::destroyProgram(Handle<HwProgram> ph) {
void NoopDriver::destroyRenderTarget(Handle<HwRenderTarget> rth) {
}
void NoopDriver::destroySamplerGroup(Handle<HwSamplerGroup> sbh) {
}
void NoopDriver::destroySwapChain(Handle<HwSwapChain> sch) {
}
@@ -108,12 +111,6 @@ void NoopDriver::destroyStream(Handle<HwStream> sh) {
void NoopDriver::destroyTimerQuery(Handle<HwTimerQuery> tqh) {
}
void NoopDriver::destroyDescriptorSetLayout(Handle<HwDescriptorSetLayout> tqh) {
}
void NoopDriver::destroyDescriptorSet(Handle<HwDescriptorSet> tqh) {
}
Handle<HwStream> NoopDriver::createStreamNative(void* nativeStream) {
return {};
}
@@ -251,6 +248,9 @@ void NoopDriver::setVertexBufferObject(Handle<HwVertexBuffer> vbh, uint32_t inde
Handle<HwBufferObject> boh) {
}
void NoopDriver::setMinMaxLevels(Handle<HwTexture> th, uint32_t minLevel, uint32_t maxLevel) {
}
void NoopDriver::update3DImage(Handle<HwTexture> th,
uint32_t level, uint32_t xoffset, uint32_t yoffset, uint32_t zoffset,
uint32_t width, uint32_t height, uint32_t depth,
@@ -276,6 +276,11 @@ void NoopDriver::setExternalStream(Handle<HwTexture> th, Handle<HwStream> sh) {
void NoopDriver::generateMipmaps(Handle<HwTexture> th) { }
void NoopDriver::updateSamplerGroup(Handle<HwSamplerGroup> sbh,
BufferDescriptor&& data) {
scheduleDestroy(std::move(data));
}
void NoopDriver::compilePrograms(CompilerPriorityQueue priority,
CallbackHandler* handler, CallbackHandler::Callback callback, void* user) {
if (callback) {
@@ -298,14 +303,27 @@ void NoopDriver::makeCurrent(Handle<HwSwapChain> drawSch, Handle<HwSwapChain> re
void NoopDriver::commit(Handle<HwSwapChain> sch) {
}
void NoopDriver::bindUniformBuffer(uint32_t index, Handle<HwBufferObject> ubh) {
}
void NoopDriver::bindBufferRange(BufferObjectBinding bindingType, uint32_t index,
Handle<HwBufferObject> ubh, uint32_t offset, uint32_t size) {
}
void NoopDriver::unbindBuffer(BufferObjectBinding bindingType, uint32_t index) {
}
void NoopDriver::bindSamplers(uint32_t index, Handle<HwSamplerGroup> sbh) {
}
void NoopDriver::setPushConstant(backend::ShaderStage stage, uint8_t index,
backend::PushConstantVariant value) {
}
void NoopDriver::insertEventMarker(char const* string) {
void NoopDriver::insertEventMarker(char const* string, uint32_t len) {
}
void NoopDriver::pushGroupMarker(char const* string) {
void NoopDriver::pushGroupMarker(char const* string, uint32_t len) {
}
void NoopDriver::popGroupMarker(int) {
@@ -374,28 +392,4 @@ void NoopDriver::endTimerQuery(Handle<HwTimerQuery> tqh) {
void NoopDriver::resetState(int) {
}
void NoopDriver::updateDescriptorSetBuffer(
backend::DescriptorSetHandle dsh,
backend::descriptor_binding_t binding,
backend::BufferObjectHandle boh,
uint32_t offset,
uint32_t size) {
}
void NoopDriver::updateDescriptorSetTexture(
backend::DescriptorSetHandle dsh,
backend::descriptor_binding_t binding,
backend::TextureHandle th,
SamplerParams params) {
}
void NoopDriver::bindDescriptorSet(
backend::DescriptorSetHandle dsh,
backend::descriptor_set_t set,
backend::DescriptorSetOffsetArray&& offsets) {
}
void NoopDriver::setDebugTag(HandleBase::HandleId handleId, utils::CString tag) {
}
} // namespace filament

View File

@@ -1,91 +0,0 @@
/*
* Copyright (C) 2024 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TNT_FILAMENT_BACKEND_OPENGL_BINDINGMAP_H
#define TNT_FILAMENT_BACKEND_OPENGL_BINDINGMAP_H
#include <backend/DriverEnums.h>
#include "gl_headers.h"
#include <utils/bitset.h>
#include <utils/debug.h>
#include <new>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
namespace filament::backend {
class BindingMap {
struct CompressedBinding {
// this is in fact a GLuint, but we only want 8-bits
uint8_t binding : 7;
uint8_t sampler : 1;
};
CompressedBinding (*mStorage)[MAX_DESCRIPTOR_COUNT];
utils::bitset64 mActiveDescriptors[MAX_DESCRIPTOR_SET_COUNT];
public:
BindingMap() noexcept
: mStorage(new (std::nothrow) CompressedBinding[MAX_DESCRIPTOR_SET_COUNT][MAX_DESCRIPTOR_COUNT]) {
#ifndef NDEBUG
memset(mStorage, 0xFF, sizeof(CompressedBinding[MAX_DESCRIPTOR_SET_COUNT][MAX_DESCRIPTOR_COUNT]));
#endif
}
~BindingMap() noexcept {
delete [] mStorage;
}
BindingMap(BindingMap const&) noexcept = delete;
BindingMap(BindingMap&&) noexcept = delete;
BindingMap& operator=(BindingMap const&) noexcept = delete;
BindingMap& operator=(BindingMap&&) noexcept = delete;
struct Binding {
GLuint binding;
DescriptorType type;
};
void insert(descriptor_set_t set, descriptor_binding_t binding, Binding entry) noexcept {
assert_invariant(set < MAX_DESCRIPTOR_SET_COUNT);
assert_invariant(binding < MAX_DESCRIPTOR_COUNT);
assert_invariant(entry.binding < 128); // we reserve 1 bit for the type right now
mStorage[set][binding] = { (uint8_t)entry.binding,
entry.type == DescriptorType::SAMPLER ||
entry.type == DescriptorType::SAMPLER_EXTERNAL };
mActiveDescriptors[set].set(binding);
}
GLuint get(descriptor_set_t set, descriptor_binding_t binding) const noexcept {
assert_invariant(set < MAX_DESCRIPTOR_SET_COUNT);
assert_invariant(binding < MAX_DESCRIPTOR_COUNT);
return mStorage[set][binding].binding;
}
utils::bitset64 getActiveDescriptors(descriptor_set_t set) const noexcept {
return mActiveDescriptors[set];
}
};
} // namespace filament::backend
#endif //TNT_FILAMENT_BACKEND_OPENGL_BINDINGMAP_H

View File

@@ -1,364 +0,0 @@
/*
* Copyright (C) 2024 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "GLDescriptorSet.h"
#include "GLBufferObject.h"
#include "GLDescriptorSetLayout.h"
#include "GLTexture.h"
#include "GLUtils.h"
#include "OpenGLDriver.h"
#include "OpenGLContext.h"
#include "OpenGLProgram.h"
#include "gl_headers.h"
#include <private/backend/HandleAllocator.h>
#include <backend/DriverEnums.h>
#include <backend/Handle.h>
#include <utils/BitmaskEnum.h>
#include <utils/Log.h>
#include <utils/Panic.h>
#include <utils/bitset.h>
#include <utils/compiler.h>
#include <utils/debug.h>
#include <algorithm>
#include <type_traits>
#include <utility>
#include <variant>
#include <stddef.h>
#include <stdint.h>
namespace filament::backend {
GLDescriptorSet::GLDescriptorSet(OpenGLContext& gl, DescriptorSetLayoutHandle dslh,
GLDescriptorSetLayout const* layout) noexcept
: descriptors(layout->maxDescriptorBinding + 1),
dslh(std::move(dslh)) {
// We have allocated enough storage for all descriptors. Now allocate the empty descriptor
// themselves.
for (auto const& entry : layout->bindings) {
size_t const index = entry.binding;
// now we'll initialize the alternative for each way we can handle this descriptor.
auto& desc = descriptors[index].desc;
switch (entry.type) {
case DescriptorType::UNIFORM_BUFFER: {
// A uniform buffer can have dynamic offsets or not and have special handling for
// ES2 (where we need to emulate it). That's four alternatives.
bool const dynamicOffset = any(entry.flags & DescriptorFlags::DYNAMIC_OFFSET);
dynamicBuffers.set(index, dynamicOffset);
if (UTILS_UNLIKELY(gl.isES2())) {
if (dynamicOffset) {
dynamicBufferCount++;
}
desc.emplace<BufferGLES2>(dynamicOffset);
} else {
auto const type = GLUtils::getBufferBindingType(BufferObjectBinding::UNIFORM);
if (dynamicOffset) {
dynamicBufferCount++;
desc.emplace<DynamicBuffer>(type);
} else {
desc.emplace<Buffer>(type);
}
}
break;
}
case DescriptorType::SHADER_STORAGE_BUFFER: {
// shader storage buffers are not supported on ES2, So that's two alternatives.
bool const dynamicOffset = any(entry.flags & DescriptorFlags::DYNAMIC_OFFSET);
dynamicBuffers.set(index, dynamicOffset);
auto const type = GLUtils::getBufferBindingType(BufferObjectBinding::SHADER_STORAGE);
if (dynamicOffset) {
dynamicBufferCount++;
desc.emplace<DynamicBuffer>(type);
} else {
desc.emplace<Buffer>(type);
}
break;
}
case DescriptorType::SAMPLER:
case DescriptorType::SAMPLER_EXTERNAL:
if (UTILS_UNLIKELY(gl.isES2())) {
desc.emplace<SamplerGLES2>();
} else {
const bool anisotropyWorkaround =
gl.ext.EXT_texture_filter_anisotropic &&
gl.bugs.texture_filter_anisotropic_broken_on_sampler;
if (anisotropyWorkaround) {
desc.emplace<SamplerWithAnisotropyWorkaround>();
} else {
desc.emplace<Sampler>();
}
}
break;
case DescriptorType::INPUT_ATTACHMENT:
break;
}
}
}
void GLDescriptorSet::update(OpenGLContext&,
descriptor_binding_t binding, GLBufferObject* bo, size_t offset, size_t size) noexcept {
assert_invariant(binding < descriptors.size());
std::visit([=](auto&& arg) {
using T = std::decay_t<decltype(arg)>;
if constexpr (std::is_same_v<T, Buffer> || std::is_same_v<T, DynamicBuffer>) {
assert_invariant(arg.target != 0);
arg.id = bo ? bo->gl.id : 0;
arg.offset = uint32_t(offset);
arg.size = uint32_t(size);
assert_invariant(arg.id || (!arg.size && !offset));
} else if constexpr (std::is_same_v<T, BufferGLES2>) {
arg.bo = bo;
arg.offset = uint32_t(offset);
} else {
// API usage error. User asked to update the wrong type of descriptor.
PANIC_PRECONDITION("descriptor %d is not a buffer", +binding);
}
}, descriptors[binding].desc);
}
void GLDescriptorSet::update(OpenGLContext& gl,
descriptor_binding_t binding, GLTexture* t, SamplerParams params) noexcept {
assert_invariant(binding < descriptors.size());
std::visit([=, &gl](auto&& arg) mutable {
using T = std::decay_t<decltype(arg)>;
if constexpr (std::is_same_v<T, Sampler> ||
std::is_same_v<T, SamplerWithAnisotropyWorkaround> ||
std::is_same_v<T, SamplerGLES2>) {
if (UTILS_UNLIKELY(t && t->target == SamplerType::SAMPLER_EXTERNAL)) {
// From OES_EGL_image_external spec:
// "The default s and t wrap modes are CLAMP_TO_EDGE, and it is an INVALID_ENUM
// error to set the wrap mode to any other value."
params.wrapS = SamplerWrapMode::CLAMP_TO_EDGE;
params.wrapT = SamplerWrapMode::CLAMP_TO_EDGE;
params.wrapR = SamplerWrapMode::CLAMP_TO_EDGE;
}
// GLES3.x specification forbids depth textures to be filtered.
if (t && isDepthFormat(t->format)
&& params.compareMode == SamplerCompareMode::NONE) {
params.filterMag = SamplerMagFilter::NEAREST;
switch (params.filterMin) {
case SamplerMinFilter::LINEAR:
params.filterMin = SamplerMinFilter::NEAREST;
break;
case SamplerMinFilter::LINEAR_MIPMAP_NEAREST:
case SamplerMinFilter::NEAREST_MIPMAP_LINEAR:
case SamplerMinFilter::LINEAR_MIPMAP_LINEAR:
params.filterMin = SamplerMinFilter::NEAREST_MIPMAP_NEAREST;
break;
default:
break;
}
}
arg.target = t ? t->gl.target : 0;
arg.id = t ? t->gl.id : 0;
if constexpr (std::is_same_v<T, Sampler> ||
std::is_same_v<T, SamplerWithAnisotropyWorkaround>) {
if constexpr (std::is_same_v<T, SamplerWithAnisotropyWorkaround>) {
arg.anisotropy = float(1u << params.anisotropyLog2);
}
if (t) {
arg.ref = t->ref;
arg.baseLevel = t->gl.baseLevel;
arg.maxLevel = t->gl.maxLevel;
arg.swizzle = t->gl.swizzle;
}
#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
arg.sampler = gl.getSampler(params);
#else
(void)gl;
#endif
} else {
arg.params = params;
}
} else {
// API usage error. User asked to update the wrong type of descriptor.
PANIC_PRECONDITION("descriptor %d is not a texture", +binding);
}
}, descriptors[binding].desc);
}
template<typename T>
void GLDescriptorSet::updateTextureView(OpenGLContext& gl,
HandleAllocatorGL& handleAllocator, GLuint unit, T const& desc) noexcept {
// The common case is that we don't have a ref handle (we only have one if
// the texture ever had a View on it).
assert_invariant(desc.ref);
GLTextureRef* const ref = handleAllocator.handle_cast<GLTextureRef*>(desc.ref);
if (UTILS_UNLIKELY((desc.baseLevel != ref->baseLevel || desc.maxLevel != ref->maxLevel))) {
// If we have views, then it's still uncommon that we'll switch often
// handle the case where we reset to the original texture
GLint baseLevel = GLint(desc.baseLevel); // NOLINT(*-signed-char-misuse)
GLint maxLevel = GLint(desc.maxLevel); // NOLINT(*-signed-char-misuse)
if (baseLevel > maxLevel) {
baseLevel = 0;
maxLevel = 1000; // per OpenGL spec
}
// that is very unfortunate that we have to call activeTexture here
gl.activeTexture(unit);
glTexParameteri(desc.target, GL_TEXTURE_BASE_LEVEL, baseLevel);
glTexParameteri(desc.target, GL_TEXTURE_MAX_LEVEL, maxLevel);
ref->baseLevel = desc.baseLevel;
ref->maxLevel = desc.maxLevel;
}
if (UTILS_UNLIKELY(desc.swizzle != ref->swizzle)) {
using namespace GLUtils;
gl.activeTexture(unit);
#if !defined(__EMSCRIPTEN__) && !defined(FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2)
glTexParameteri(desc.target, GL_TEXTURE_SWIZZLE_R, (GLint)getSwizzleChannel(desc.swizzle[0]));
glTexParameteri(desc.target, GL_TEXTURE_SWIZZLE_G, (GLint)getSwizzleChannel(desc.swizzle[1]));
glTexParameteri(desc.target, GL_TEXTURE_SWIZZLE_B, (GLint)getSwizzleChannel(desc.swizzle[2]));
glTexParameteri(desc.target, GL_TEXTURE_SWIZZLE_A, (GLint)getSwizzleChannel(desc.swizzle[3]));
#endif
ref->swizzle = desc.swizzle;
}
}
void GLDescriptorSet::bind(
OpenGLContext& gl,
HandleAllocatorGL& handleAllocator,
OpenGLProgram const& p,
descriptor_set_t set, uint32_t const* offsets, bool offsetsOnly) const noexcept {
// TODO: check that offsets is sized correctly
size_t dynamicOffsetIndex = 0;
utils::bitset64 activeDescriptorBindings = p.getActiveDescriptors(set);
if (offsetsOnly) {
activeDescriptorBindings &= dynamicBuffers;
}
// loop only over the active indices for this program
activeDescriptorBindings.forEachSetBit(
[this,&gl, &handleAllocator, &p, set, offsets, &dynamicOffsetIndex]
(size_t binding) {
// This would fail here if we're trying to set a descriptor that doesn't exist in the
// program. In other words, a mismatch between the program's layout and this descriptor-set.
assert_invariant(binding < descriptors.size());
auto const& entry = descriptors[binding];
std::visit(
[&gl, &handleAllocator, &p, &dynamicOffsetIndex, set, binding, offsets]
(auto&& arg) {
using T = std::decay_t<decltype(arg)>;
if constexpr (std::is_same_v<T, Buffer>) {
GLuint const bindingPoint = p.getBufferBinding(set, binding);
GLintptr const offset = arg.offset;
assert_invariant(arg.id || (!arg.size && !offset));
gl.bindBufferRange(arg.target, bindingPoint, arg.id, offset, arg.size);
} else if constexpr (std::is_same_v<T, DynamicBuffer>) {
GLuint const bindingPoint = p.getBufferBinding(set, binding);
GLintptr const offset = arg.offset + offsets[dynamicOffsetIndex++];
assert_invariant(arg.id || (!arg.size && !offset));
gl.bindBufferRange(arg.target, bindingPoint, arg.id, offset, arg.size);
} else if constexpr (std::is_same_v<T, BufferGLES2>) {
GLuint const bindingPoint = p.getBufferBinding(set, binding);
GLintptr offset = arg.offset;
if (arg.dynamicOffset) {
offset += offsets[dynamicOffsetIndex++];
}
if (arg.bo) {
auto buffer = static_cast<char const*>(arg.bo->gl.buffer) + offset;
p.updateUniforms(bindingPoint, arg.bo->gl.id, buffer, arg.bo->age);
}
} else if constexpr (std::is_same_v<T, Sampler>) {
GLuint const unit = p.getTextureUnit(set, binding);
if (arg.target) {
gl.bindTexture(unit, arg.target, arg.id);
gl.bindSampler(unit, arg.sampler);
if (UTILS_UNLIKELY(arg.ref)) {
updateTextureView(gl, handleAllocator, unit, arg);
}
} else {
gl.unbindTextureUnit(unit);
}
} else if constexpr (std::is_same_v<T, SamplerWithAnisotropyWorkaround>) {
GLuint const unit = p.getTextureUnit(set, binding);
if (arg.target) {
gl.bindTexture(unit, arg.target, arg.id);
gl.bindSampler(unit, arg.sampler);
if (UTILS_UNLIKELY(arg.ref)) {
updateTextureView(gl, handleAllocator, unit, arg);
}
#if defined(GL_EXT_texture_filter_anisotropic)
// Driver claims to support anisotropic filtering, but it fails when set on
// the sampler, we have to set it on the texture instead.
glTexParameterf(arg.target, GL_TEXTURE_MAX_ANISOTROPY_EXT,
std::min(gl.gets.max_anisotropy, float(arg.anisotropy)));
#endif
} else {
gl.unbindTextureUnit(unit);
}
} else if constexpr (std::is_same_v<T, SamplerGLES2>) {
// in ES2 the sampler parameters need to be set on the texture itself
GLuint const unit = p.getTextureUnit(set, binding);
if (arg.target) {
gl.bindTexture(unit, arg.target, arg.id);
SamplerParams const params = arg.params;
glTexParameteri(arg.target, GL_TEXTURE_MIN_FILTER,
(GLint)GLUtils::getTextureFilter(params.filterMin));
glTexParameteri(arg.target, GL_TEXTURE_MAG_FILTER,
(GLint)GLUtils::getTextureFilter(params.filterMag));
glTexParameteri(arg.target, GL_TEXTURE_WRAP_S,
(GLint)GLUtils::getWrapMode(params.wrapS));
glTexParameteri(arg.target, GL_TEXTURE_WRAP_T,
(GLint)GLUtils::getWrapMode(params.wrapT));
#if defined(GL_EXT_texture_filter_anisotropic)
glTexParameterf(arg.target, GL_TEXTURE_MAX_ANISOTROPY_EXT,
std::min(gl.gets.max_anisotropy, arg.anisotropy));
#endif
} else {
gl.unbindTextureUnit(unit);
}
}
}, entry.desc);
});
CHECK_GL_ERROR(utils::slog.e)
}
void GLDescriptorSet::validate(HandleAllocatorGL& allocator,
DescriptorSetLayoutHandle pipelineLayout) const {
if (UTILS_UNLIKELY(dslh != pipelineLayout)) {
auto* const dsl = allocator.handle_cast < GLDescriptorSetLayout const * > (dslh);
auto* const cur = allocator.handle_cast < GLDescriptorSetLayout const * > (pipelineLayout);
UTILS_UNUSED_IN_RELEASE
bool const pipelineLayoutMatchesDescriptorSetLayout = std::equal(
dsl->bindings.begin(), dsl->bindings.end(),
cur->bindings.begin(),
[](DescriptorSetLayoutBinding const& lhs,
DescriptorSetLayoutBinding const& rhs) {
return lhs.type == rhs.type &&
lhs.stageFlags == rhs.stageFlags &&
lhs.binding == rhs.binding &&
lhs.flags == rhs.flags &&
lhs.count == rhs.count;
});
assert_invariant(pipelineLayoutMatchesDescriptorSetLayout);
}
}
} // namespace filament::backend

View File

@@ -1,175 +0,0 @@
/*
* Copyright (C) 2024 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TNT_FILAMENT_BACKEND_OPENGL_GLDESCRIPTORSET_H
#define TNT_FILAMENT_BACKEND_OPENGL_GLDESCRIPTORSET_H
#include "DriverBase.h"
#include "gl_headers.h"
#include <private/backend/HandleAllocator.h>
#include <backend/DriverEnums.h>
#include <backend/Handle.h>
#include <utils/bitset.h>
#include <utils/FixedCapacityVector.h>
#include <math/half.h>
#include <array>
#include <variant>
#include <stddef.h>
#include <stdint.h>
namespace filament::backend {
struct GLBufferObject;
struct GLTexture;
struct GLTextureRef;
struct GLDescriptorSetLayout;
class OpenGLProgram;
class OpenGLContext;
class OpenGLDriver;
struct GLDescriptorSet : public HwDescriptorSet {
using HwDescriptorSet::HwDescriptorSet;
GLDescriptorSet(OpenGLContext& gl, DescriptorSetLayoutHandle dslh,
GLDescriptorSetLayout const* layout) noexcept;
// update a buffer descriptor in the set
void update(OpenGLContext& gl,
descriptor_binding_t binding, GLBufferObject* bo, size_t offset, size_t size) noexcept;
// update a sampler descriptor in the set
void update(OpenGLContext& gl,
descriptor_binding_t binding, GLTexture* t, SamplerParams params) noexcept;
// conceptually bind the set to the command buffer
void bind(
OpenGLContext& gl,
HandleAllocatorGL& handleAllocator,
OpenGLProgram const& p,
descriptor_set_t set, uint32_t const* offsets, bool offsetsOnly) const noexcept;
uint32_t getDynamicBufferCount() const noexcept {
return dynamicBufferCount;
}
void validate(HandleAllocatorGL& allocator, DescriptorSetLayoutHandle pipelineLayout) const;
private:
// a Buffer Descriptor such as SSBO or UBO with static offset
struct Buffer {
// Workaround: we cannot define the following as Buffer() = default because one of our
// clients has their compiler set up where such declaration (possibly coupled with explicit)
// will be considered a deleted constructor.
Buffer() {}
explicit Buffer(GLenum target) noexcept : target(target) {}
GLenum target; // 4
GLuint id = 0; // 4
uint32_t offset = 0; // 4
uint32_t size = 0; // 4
};
// a Buffer Descriptor such as SSBO or UBO with dynamic offset
struct DynamicBuffer {
DynamicBuffer() = default;
explicit DynamicBuffer(GLenum target) noexcept : target(target) { }
GLenum target; // 4
GLuint id = 0; // 4
uint32_t offset = 0; // 4
uint32_t size = 0; // 4
};
// a UBO descriptor for ES2
struct BufferGLES2 {
BufferGLES2() = default;
explicit BufferGLES2(bool dynamicOffset) noexcept : dynamicOffset(dynamicOffset) { }
GLBufferObject const* bo = nullptr; // 8
uint32_t offset = 0; // 4
bool dynamicOffset = false; // 4
};
// A sampler descriptor
struct Sampler {
GLenum target = 0; // 4
GLuint id = 0; // 4
GLuint sampler = 0; // 4
Handle<GLTextureRef> ref; // 4
int8_t baseLevel = 0x7f; // 1
int8_t maxLevel = -1; // 1
std::array<TextureSwizzle, 4> swizzle{ // 4
TextureSwizzle::CHANNEL_0,
TextureSwizzle::CHANNEL_1,
TextureSwizzle::CHANNEL_2,
TextureSwizzle::CHANNEL_3
};
};
struct SamplerWithAnisotropyWorkaround {
GLenum target = 0; // 4
GLuint id = 0; // 4
GLuint sampler = 0; // 4
Handle<GLTextureRef> ref; // 4
math::half anisotropy = 1.0f; // 2
int8_t baseLevel = 0x7f; // 1
int8_t maxLevel = -1; // 1
std::array<TextureSwizzle, 4> swizzle{ // 4
TextureSwizzle::CHANNEL_0,
TextureSwizzle::CHANNEL_1,
TextureSwizzle::CHANNEL_2,
TextureSwizzle::CHANNEL_3
};
};
// A sampler descriptor for ES2
struct SamplerGLES2 {
GLenum target = 0; // 4
GLuint id = 0; // 4
SamplerParams params{}; // 4
float anisotropy = 1.0f; // 4
};
struct Descriptor {
std::variant<
Buffer,
DynamicBuffer,
BufferGLES2,
Sampler,
SamplerWithAnisotropyWorkaround,
SamplerGLES2> desc;
};
static_assert(sizeof(Descriptor) <= 32);
template<typename T>
static void updateTextureView(OpenGLContext& gl,
HandleAllocatorGL& handleAllocator, GLuint unit, T const& desc) noexcept;
utils::FixedCapacityVector<Descriptor> descriptors; // 16
utils::bitset64 dynamicBuffers; // 8
DescriptorSetLayoutHandle dslh; // 4
uint8_t dynamicBufferCount = 0; // 1
};
static_assert(sizeof(GLDescriptorSet) <= 32);
} // namespace filament::backend
#endif //TNT_FILAMENT_BACKEND_OPENGL_GLDESCRIPTORSET_H

View File

@@ -1,52 +0,0 @@
/*
* Copyright (C) 2024 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TNT_FILAMENT_BACKEND_OPENGL_GLDESCRIPTORSETLAYOUT_H
#define TNT_FILAMENT_BACKEND_OPENGL_GLDESCRIPTORSETLAYOUT_H
#include "DriverBase.h"
#include <backend/DriverEnums.h>
#include <algorithm>
#include <utility>
#include <stdint.h>
namespace filament::backend {
struct GLDescriptorSetLayout : public HwDescriptorSetLayout, public DescriptorSetLayout {
using HwDescriptorSetLayout::HwDescriptorSetLayout;
explicit GLDescriptorSetLayout(DescriptorSetLayout&& layout) noexcept
: DescriptorSetLayout(std::move(layout)) {
std::sort(bindings.begin(), bindings.end(),
[](auto&& lhs, auto&& rhs){
return lhs.binding < rhs.binding;
});
auto p = std::max_element(bindings.cbegin(), bindings.cend(),
[](auto const& lhs, auto const& rhs) {
return lhs.binding < rhs.binding;
});
maxDescriptorBinding = p->binding;
}
uint8_t maxDescriptorBinding = 0;
};
} // namespace filament::backend
#endif //TNT_FILAMENT_BACKEND_OPENGL_GLDESCRIPTORSETLAYOUT_H

View File

@@ -21,32 +21,12 @@
#include "gl_headers.h"
#include <backend/Handle.h>
#include <backend/DriverEnums.h>
#include <backend/platforms/OpenGLPlatform.h>
#include <array>
#include <stdint.h>
namespace filament::backend {
struct GLTextureRef {
GLTextureRef() = default;
// view reference counter
uint16_t count = 1;
// current per-view values of the texture (in GL we can only have a single View active at
// a time, and this tracks that state). It's used to avoid unnecessarily change state.
int8_t baseLevel = 127;
int8_t maxLevel = -1;
std::array<TextureSwizzle, 4> swizzle{
TextureSwizzle::CHANNEL_0,
TextureSwizzle::CHANNEL_1,
TextureSwizzle::CHANNEL_2,
TextureSwizzle::CHANNEL_3
};
};
struct GLTexture : public HwTexture {
using HwTexture::HwTexture;
struct GL {
@@ -64,14 +44,8 @@ struct GLTexture : public HwTexture {
bool imported : 1;
uint8_t sidecarSamples : 4;
uint8_t reserved1 : 3;
std::array<TextureSwizzle, 4> swizzle{
TextureSwizzle::CHANNEL_0,
TextureSwizzle::CHANNEL_1,
TextureSwizzle::CHANNEL_2,
TextureSwizzle::CHANNEL_3
};
} gl;
mutable Handle<GLTextureRef> ref;
OpenGLPlatform::ExternalTexture* externalTexture = nullptr;
};

View File

@@ -552,14 +552,6 @@ void OpenGLContext::initBugs(Bugs* bugs, Extensions const& exts,
} else if (strstr(renderer, "AMD") ||
strstr(renderer, "ATI")) {
// AMD/ATI GPU
} else if (strstr(vendor, "Mesa")) {
// Seen on
// [Mesa],
// [llvmpipe (LLVM 17.0.6, 256 bits)],
// [4.5 (Core Profile) Mesa 24.0.6-1],
// [4.50]
// not known which version are affected
bugs->rebind_buffer_after_deletion = true;
} else if (strstr(renderer, "Mozilla")) {
bugs->disable_invalidate_framebuffer = true;
}
@@ -614,7 +606,7 @@ FeatureLevel OpenGLContext::resolveFeatureLevel(GLint major, GLint minor,
featureLevel = FeatureLevel::FEATURE_LEVEL_2;
if (gets.max_texture_image_units >= MAX_FRAGMENT_SAMPLER_COUNT &&
gets.max_combined_texture_image_units >=
(MAX_FRAGMENT_SAMPLER_COUNT + MAX_VERTEX_SAMPLER_COUNT)) {
(MAX_FRAGMENT_SAMPLER_COUNT + MAX_VERTEX_SAMPLER_COUNT)) {
featureLevel = FeatureLevel::FEATURE_LEVEL_3;
}
}
@@ -623,13 +615,15 @@ FeatureLevel OpenGLContext::resolveFeatureLevel(GLint major, GLint minor,
# ifndef IOS // IOS is guaranteed to have ES3.x
else if (UTILS_UNLIKELY(major == 2)) {
// Runtime OpenGL version is ES 2.x
// note: mandatory extensions (all supported by Mali-400 and Adreno 304)
// OES_depth_texture
// OES_depth24
// OES_packed_depth_stencil
// OES_rgb8_rgba8
// OES_standard_derivatives
// OES_texture_npot
# if defined(BACKEND_OPENGL_LEVEL_GLES30)
// mandatory extensions (all supported by Mali-400 and Adreno 304)
assert_invariant(exts.OES_depth_texture);
assert_invariant(exts.OES_depth24);
assert_invariant(exts.OES_packed_depth_stencil);
assert_invariant(exts.OES_rgb8_rgba8);
assert_invariant(exts.OES_standard_derivatives);
assert_invariant(exts.OES_texture_npot);
# endif
featureLevel = FeatureLevel::FEATURE_LEVEL_0;
}
# endif // IOS
@@ -935,19 +929,15 @@ void OpenGLContext::unbindSampler(GLuint sampler) noexcept {
}
}
void OpenGLContext::deleteBuffer(GLuint buffer, GLenum target) noexcept {
glDeleteBuffers(1, &buffer);
void OpenGLContext::deleteBuffers(GLsizei n, const GLuint* buffers, GLenum target) noexcept {
glDeleteBuffers(n, buffers);
// bindings of bound buffers are reset to 0
size_t const targetIndex = getIndexForBufferTarget(target);
auto& genericBinding = state.buffers.genericBinding[targetIndex];
if (genericBinding == buffer) {
genericBinding = 0;
}
if (UTILS_UNLIKELY(bugs.rebind_buffer_after_deletion)) {
if (genericBinding) {
glBindBuffer(target, genericBinding);
const size_t targetIndex = getIndexForBufferTarget(target);
auto& genericBuffer = state.buffers.genericBinding[targetIndex];
UTILS_NOUNROLL
for (GLsizei i = 0; i < n; ++i) {
if (genericBuffer == buffers[i]) {
genericBuffer = 0;
}
}
@@ -956,13 +946,16 @@ void OpenGLContext::deleteBuffer(GLuint buffer, GLenum target) noexcept {
(target != GL_UNIFORM_BUFFER && target != GL_TRANSFORM_FEEDBACK_BUFFER));
if (target == GL_UNIFORM_BUFFER || target == GL_TRANSFORM_FEEDBACK_BUFFER) {
auto& indexedBinding = state.buffers.targets[targetIndex];
UTILS_NOUNROLL
for (auto& entry: indexedBinding.buffers) {
if (entry.name == buffer) {
entry.name = 0;
entry.offset = 0;
entry.size = 0;
auto& indexedBuffer = state.buffers.targets[targetIndex];
UTILS_NOUNROLL // clang generates >1 KiB of code!!
for (GLsizei i = 0; i < n; ++i) {
UTILS_NOUNROLL
for (auto& buffer : indexedBuffer.buffers) {
if (buffer.name == buffers[i]) {
buffer.name = 0;
buffer.offset = 0;
buffer.size = 0;
}
}
}
}

View File

@@ -60,19 +60,10 @@ public:
struct RenderPrimitive {
static_assert(MAX_VERTEX_ATTRIBUTE_COUNT <= 16);
GLuint vao[2] = {}; // 8
GLuint vao[2] = {}; // 4
GLuint elementArray = 0; // 4
GLenum indicesType = 0; // 4
// The optional 32-bit handle to a GLVertexBuffer is necessary only if the referenced
// VertexBuffer supports buffer objects. If this is zero, then the VBO handles array is
// immutable.
Handle<HwVertexBuffer> vertexBufferWithObjects; // 4
mutable utils::bitset<uint16_t> vertexAttribArray; // 2
uint8_t reserved[2] = {}; // 2
// if this differs from vertexBufferWithObjects->bufferObjectsVersion, this VAO needs to
// be updated (see OpenGLDriver::updateVertexArrayObject())
uint8_t vertexBufferVersion = 0; // 1
@@ -85,11 +76,16 @@ public:
// See OpenGLContext::bindVertexArray()
uint8_t nameVersion = 0; // 1
// Size in bytes of indices in the index buffer (1 or 2)
uint8_t indicesShift = 0; // 1
// Size in bytes of indices in the index buffer
uint8_t indicesSize = 0; // 1
// The optional 32-bit handle to a GLVertexBuffer is necessary only if the referenced
// VertexBuffer supports buffer objects. If this is zero, then the VBO handles array is
// immutable.
Handle<HwVertexBuffer> vertexBufferWithObjects; // 4
GLenum getIndicesType() const noexcept {
return indicesType;
return indicesSize == 4 ? GL_UNSIGNED_INT : GL_UNSIGNED_SHORT;
}
} gl;
@@ -190,7 +186,7 @@ public:
inline void viewport(GLint left, GLint bottom, GLsizei width, GLsizei height) noexcept;
inline void depthRange(GLclampf near, GLclampf far) noexcept;
void deleteBuffer(GLuint buffer, GLenum target) noexcept;
void deleteBuffers(GLsizei n, const GLuint* buffers, GLenum target) noexcept;
void deleteVertexArray(GLuint vao) noexcept;
void destroyWithContext(size_t index, std::function<void(OpenGLContext&)> const& closure) noexcept;
@@ -316,15 +312,10 @@ public:
// a glFinish. So we must delay the destruction until we know the GPU is finished.
bool delay_fbo_destruction;
// Mesa sometimes clears the generic buffer binding when *another* buffer is destroyed,
// if that other buffer is bound on an *indexed* buffer binding.
bool rebind_buffer_after_deletion;
// Force feature level 0. Typically used for low end ES3 devices with significant driver
// bugs or performance issues.
bool force_feature_level0;
} bugs = {};
// state getters -- as needed.
@@ -483,6 +474,12 @@ public:
void unbindEverything() noexcept;
void synchronizeStateAndCache(size_t index) noexcept;
void setEs2UniformBinding(size_t index, GLuint id, void const* data, uint16_t age) noexcept {
mUniformBindings[index] = { id, data, age };
}
auto getEs2UniformBinding(size_t index) const noexcept {
return mUniformBindings[index];
}
#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
GLuint getSamplerSlow(SamplerParams sp) const noexcept;
@@ -509,6 +506,9 @@ private:
std::vector<std::function<void(OpenGLContext&)>> mDestroyWithNormalContext;
RenderPrimitive mDefaultVAO;
std::optional<GLuint> mDefaultFbo[2];
std::array<
std::tuple<GLuint, void const*, uint16_t>,
CONFIG_UNIFORM_BINDING_COUNT> mUniformBindings = {};
mutable tsl::robin_map<SamplerParams, GLuint,
SamplerParams::Hasher, SamplerParams::EqualTo> mSamplerMap;
@@ -559,9 +559,6 @@ private:
{ bugs.delay_fbo_destruction,
"delay_fbo_destruction",
""},
{ bugs.rebind_buffer_after_deletion,
"rebind_buffer_after_deletion",
""},
{ bugs.force_feature_level0,
"force_feature_level0",
""},

File diff suppressed because it is too large Load Diff

View File

@@ -21,8 +21,6 @@
#include "OpenGLContext.h"
#include "OpenGLTimerQuery.h"
#include "GLBufferObject.h"
#include "GLDescriptorSet.h"
#include "GLDescriptorSetLayout.h"
#include "GLTexture.h"
#include "ShaderCompilerService.h"
@@ -38,7 +36,6 @@
#include "private/backend/Driver.h"
#include "private/backend/HandleAllocator.h"
#include <utils/bitset.h>
#include <utils/FixedCapacityVector.h>
#include <utils/compiler.h>
#include <utils/debug.h>
@@ -55,7 +52,6 @@
#include <tuple>
#include <type_traits>
#include <utility>
#include <variant>
#include <vector>
#include <stddef.h>
@@ -127,6 +123,16 @@ public:
} gl;
};
struct GLSamplerGroup : public HwSamplerGroup {
using HwSamplerGroup::HwSamplerGroup;
struct Entry {
Handle<HwTexture> th;
GLuint sampler = 0u;
};
utils::FixedCapacityVector<Entry> textureUnitEntries;
explicit GLSamplerGroup(size_t size) noexcept : textureUnitEntries(size) { }
};
struct GLRenderPrimitive : public HwRenderPrimitive {
using HwRenderPrimitive::HwRenderPrimitive;
OpenGLContext::RenderPrimitive gl;
@@ -139,10 +145,6 @@ public:
using GLTimerQuery = filament::backend::GLTimerQuery;
using GLDescriptorSetLayout = filament::backend::GLDescriptorSetLayout;
using GLDescriptorSet = filament::backend::GLDescriptorSet;
struct GLStream : public HwStream {
using HwStream::HwStream;
struct Info {
@@ -315,6 +317,10 @@ private:
void resolvePass(ResolveAction action, GLRenderTarget const* rt,
TargetBufferFlags discardFlags) noexcept;
const std::array<GLSamplerGroup*, Program::SAMPLER_BINDING_COUNT>& getSamplerBindings() const {
return mSamplerBindings;
}
using AttachmentArray = std::array<GLenum, MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT + 2>;
static GLsizei getAttachments(AttachmentArray& attachments, TargetBufferFlags buffers,
bool isDefaultFramebuffer) noexcept;
@@ -327,16 +333,8 @@ private:
GLboolean mRenderPassStencilWrite{};
GLRenderPrimitive const* mBoundRenderPrimitive = nullptr;
OpenGLProgram* mBoundProgram = nullptr;
bool mValidProgram = false;
utils::bitset8 mInvalidDescriptorSetBindings;
utils::bitset8 mInvalidDescriptorSetBindingOffsets;
void updateDescriptors(utils::bitset8 invalidDescriptorSets) noexcept;
struct {
backend::DescriptorSetHandle dsh;
std::array<uint32_t, CONFIG_UNIFORM_BINDING_COUNT> offsets;
} mBoundDescriptorSets[MAX_DESCRIPTOR_SET_COUNT];
void clearWithRasterPipe(TargetBufferFlags clearFlags,
math::float4 const& linearColor, GLfloat depth, GLint stencil) noexcept;
@@ -348,6 +346,9 @@ private:
// ES2 only. Uniform buffer emulation binding points
GLuint mLastAssignedEmulatedUboId = 0;
// sampler buffer binding points (nullptr if not used)
std::array<GLSamplerGroup*, Program::SAMPLER_BINDING_COUNT> mSamplerBindings = {}; // 4 pointers
// this must be accessed from the driver thread only
std::vector<GLTexture*> mTexturesWithStreamsAttached;
@@ -358,6 +359,8 @@ private:
void detachStream(GLTexture* t) noexcept;
void replaceStream(GLTexture* t, GLStream* stream) noexcept;
void updateTextureLodRange(GLTexture* texture, int8_t targetLevel) noexcept;
#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
// tasks executed on the main thread after the fence signaled
void whenGpuCommandsComplete(const std::function<void()>& fn) noexcept;
@@ -381,7 +384,6 @@ private:
bool mRec709OutputColorspace = false;
PushConstantBundle* mCurrentPushConstants = nullptr;
PipelineLayout::SetLayout mCurrentSetLayout;
};
// ------------------------------------------------------------------------------------------------

View File

@@ -17,7 +17,6 @@
#include "OpenGLProgram.h"
#include "GLUtils.h"
#include "GLTexture.h"
#include "OpenGLDriver.h"
#include "ShaderCompilerService.h"
@@ -25,7 +24,6 @@
#include <backend/Program.h>
#include <backend/Handle.h>
#include <utils/BitmaskEnum.h>
#include <utils/compiler.h>
#include <utils/debug.h>
#include <utils/FixedCapacityVector.h>
@@ -34,10 +32,9 @@
#include <algorithm>
#include <array>
#include <algorithm>
#include <new>
#include <string_view>
#include <utility>
#include <new>
#include <stddef.h>
#include <stdint.h>
@@ -49,8 +46,9 @@ using namespace utils;
using namespace backend;
struct OpenGLProgram::LazyInitializationData {
Program::DescriptorSetInfo descriptorBindings;
Program::BindingUniformsInfo bindingUniformInfo;
Program::UniformBlockInfo uniformBlockInfo;
Program::SamplerGroupInfo samplerGroupInfo;
std::array<Program::UniformInfo, Program::UNIFORM_BINDING_COUNT> bindingUniformInfo;
utils::FixedCapacityVector<Program::PushConstant> vertexPushConstants;
utils::FixedCapacityVector<Program::PushConstant> fragmentPushConstants;
};
@@ -59,14 +57,16 @@ struct OpenGLProgram::LazyInitializationData {
OpenGLProgram::OpenGLProgram() noexcept = default;
OpenGLProgram::OpenGLProgram(OpenGLDriver& gld, Program&& program) noexcept
: HwProgram(std::move(program.getName())), mRec709Location(-1) {
: HwProgram(std::move(program.getName())) {
auto* const lazyInitializationData = new(std::nothrow) LazyInitializationData();
lazyInitializationData->samplerGroupInfo = std::move(program.getSamplerGroupInfo());
if (UTILS_UNLIKELY(gld.getContext().isES2())) {
lazyInitializationData->bindingUniformInfo = std::move(program.getBindingUniformInfo());
} else {
lazyInitializationData->uniformBlockInfo = std::move(program.getUniformBlockBindings());
}
lazyInitializationData->vertexPushConstants = std::move(program.getPushConstants(ShaderStage::VERTEX));
lazyInitializationData->fragmentPushConstants = std::move(program.getPushConstants(ShaderStage::FRAGMENT));
lazyInitializationData->descriptorBindings = std::move(program.getDescriptorBindings());
ShaderCompilerService& compiler = gld.getShaderCompilerService();
mToken = compiler.createProgram(name, std::move(program));
@@ -124,87 +124,36 @@ void OpenGLProgram::initializeProgramState(OpenGLContext& context, GLuint progra
SYSTRACE_CALL();
// from the pipeline layout we compute a mapping from {set, binding} to {binding}
// for both buffers and textures
for (auto&& entry: lazyInitializationData.descriptorBindings) {
std::sort(entry.begin(), entry.end(),
[](Program::Descriptor const& lhs, Program::Descriptor const& rhs) {
return lhs.binding < rhs.binding;
});
}
GLuint tmu = 0;
GLuint binding = 0;
// needed for samplers
context.useProgram(program);
UTILS_NOUNROLL
for (backend::descriptor_set_t set = 0; set < MAX_DESCRIPTOR_SET_COUNT; set++) {
for (Program::Descriptor const& entry: lazyInitializationData.descriptorBindings[set]) {
switch (entry.type) {
case DescriptorType::UNIFORM_BUFFER:
case DescriptorType::SHADER_STORAGE_BUFFER: {
if (!entry.name.empty()) {
#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
if (UTILS_LIKELY(!context.isES2())) {
GLuint const index = glGetUniformBlockIndex(program,
entry.name.c_str());
if (index != GL_INVALID_INDEX) {
// this can fail if the program doesn't use this descriptor
glUniformBlockBinding(program, index, binding);
mBindingMap.insert(set, entry.binding,
{ binding, entry.type });
++binding;
}
} else
#endif
{
auto pos = std::find_if(lazyInitializationData.bindingUniformInfo.begin(),
lazyInitializationData.bindingUniformInfo.end(),
[&name = entry.name](const auto& item) {
return std::get<1>(item) == name;
});
if (pos != lazyInitializationData.bindingUniformInfo.end()) {
binding = std::get<0>(*pos);
mBindingMap.insert(set, entry.binding, { binding, entry.type });
}
}
}
break;
if (!context.isES2()) {
// Note: This is only needed, because the layout(binding=) syntax is not permitted in glsl
// (ES3.0 and GL4.1). The backend needs a way to associate a uniform block to a binding point.
UTILS_NOUNROLL
for (GLuint binding = 0, n = lazyInitializationData.uniformBlockInfo.size();
binding < n; binding++) {
auto const& name = lazyInitializationData.uniformBlockInfo[binding];
if (!name.empty()) {
GLuint const index = glGetUniformBlockIndex(program, name.c_str());
if (index != GL_INVALID_INDEX) {
glUniformBlockBinding(program, index, binding);
}
case DescriptorType::SAMPLER:
case DescriptorType::SAMPLER_EXTERNAL: {
if (!entry.name.empty()) {
GLint const loc = glGetUniformLocation(program, entry.name.c_str());
if (loc >= 0) {
// this can fail if the program doesn't use this descriptor
mBindingMap.insert(set, entry.binding, { tmu, entry.type });
glUniform1i(loc, GLint(tmu));
++tmu;
}
}
break;
}
case DescriptorType::INPUT_ATTACHMENT:
break;
CHECK_GL_ERROR(utils::slog.e)
}
CHECK_GL_ERROR(utils::slog.e)
}
}
if (context.isES2()) {
} else
#endif
{
// ES2 initialization of (fake) UBOs
UniformsRecord* const uniformsRecords = new(std::nothrow) UniformsRecord[Program::UNIFORM_BINDING_COUNT];
UTILS_NOUNROLL
for (auto&& [index, name, uniforms] : lazyInitializationData.bindingUniformInfo) {
uniformsRecords[index].locations.reserve(uniforms.size());
uniformsRecords[index].locations.resize(uniforms.size());
for (GLuint binding = 0, n = Program::UNIFORM_BINDING_COUNT; binding < n; binding++) {
Program::UniformInfo& uniforms = lazyInitializationData.bindingUniformInfo[binding];
uniformsRecords[binding].locations.reserve(uniforms.size());
uniformsRecords[binding].locations.resize(uniforms.size());
for (size_t j = 0, c = uniforms.size(); j < c; j++) {
GLint const loc = glGetUniformLocation(program, uniforms[j].name.c_str());
uniformsRecords[index].locations[j] = loc;
if (UTILS_UNLIKELY(index == 0)) {
uniformsRecords[binding].locations[j] = loc;
if (UTILS_UNLIKELY(binding == 0)) {
// This is a bit of a gross hack here, we stash the location of
// "frameUniforms.rec709", which obviously the backend shouldn't know about,
// which is used for emulating the "rec709" colorspace in the shader.
@@ -216,11 +165,51 @@ void OpenGLProgram::initializeProgramState(OpenGLContext& context, GLuint progra
}
}
}
uniformsRecords[index].uniforms = std::move(uniforms);
uniformsRecords[binding].uniforms = std::move(uniforms);
}
mUniformsRecords = uniformsRecords;
}
uint8_t usedBindingCount = 0;
uint8_t tmu = 0;
UTILS_NOUNROLL
for (size_t i = 0, c = lazyInitializationData.samplerGroupInfo.size(); i < c; i++) {
auto const& samplers = lazyInitializationData.samplerGroupInfo[i].samplers;
if (samplers.empty()) {
// this binding point doesn't have any samplers, skip it.
continue;
}
// keep this in the loop, so we skip it in the rare case a program doesn't have
// sampler. The context cache will prevent repeated calls to GL.
context.useProgram(program);
bool atLeastOneSamplerUsed = false;
UTILS_NOUNROLL
for (const Program::Sampler& sampler: samplers) {
// find its location and associate a TMU to it
GLint const loc = glGetUniformLocation(program, sampler.name.c_str());
if (loc >= 0) {
// this can fail if the program doesn't use this sampler
glUniform1i(loc, tmu);
atLeastOneSamplerUsed = true;
}
tmu++;
}
// if this program doesn't use any sampler from this HwSamplerGroup, just cancel the
// whole group.
if (atLeastOneSamplerUsed) {
// Cache the sampler uniform locations for each interface block
mUsedSamplerBindingPoints[usedBindingCount] = i;
usedBindingCount++;
} else {
tmu -= samplers.size();
}
}
mUsedBindingsCount = usedBindingCount;
auto& vertexConstants = lazyInitializationData.vertexPushConstants;
auto& fragmentConstants = lazyInitializationData.fragmentPushConstants;
@@ -237,8 +226,41 @@ void OpenGLProgram::initializeProgramState(OpenGLContext& context, GLuint progra
}
}
void OpenGLProgram::updateUniforms(
uint32_t index, GLuint id, void const* buffer, uint16_t age) const noexcept {
void OpenGLProgram::updateSamplers(OpenGLDriver* const gld) const noexcept {
using GLTexture = OpenGLDriver::GLTexture;
#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
bool const es2 = gld->getContext().isES2();
#endif
// cache a few member variable locally, outside the loop
auto const& UTILS_RESTRICT samplerBindings = gld->getSamplerBindings();
auto const& UTILS_RESTRICT usedBindingPoints = mUsedSamplerBindingPoints;
for (uint8_t i = 0, tmu = 0, n = mUsedBindingsCount; i < n; i++) {
size_t const binding = usedBindingPoints[i];
assert_invariant(binding < Program::SAMPLER_BINDING_COUNT);
auto const * const sb = samplerBindings[binding];
assert_invariant(sb);
if (!sb) continue; // should never happen, this would be a user error.
for (uint8_t j = 0, m = sb->textureUnitEntries.size(); j < m; ++j, ++tmu) { // "<=" on purpose here
Handle<HwTexture> th = sb->textureUnitEntries[j].th;
if (th) { // program may not use all samplers of sampler group
GLTexture const* const t = gld->handle_cast<GLTexture const*>(th);
gld->bindTexture(tmu, t);
#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
if (UTILS_LIKELY(!es2)) {
GLuint const s = sb->textureUnitEntries[j].sampler;
gld->bindSampler(tmu, s);
}
#endif
}
}
}
CHECK_GL_ERROR(utils::slog.e)
}
void OpenGLProgram::updateUniforms(uint32_t index, GLuint id, void const* buffer, uint16_t age) noexcept {
assert_invariant(mUniformsRecords);
assert_invariant(buffer);
@@ -255,9 +277,7 @@ void OpenGLProgram::updateUniforms(
for (size_t i = 0, c = records.uniforms.size(); i < c; i++) {
Program::Uniform const& u = records.uniforms[i];
GLint const loc = records.locations[i];
// mRec709Location is special, it is handled by setRec709ColorSpace() and the corresponding
// entry in `buffer` is typically not initialized, so we skip it.
if (loc < 0 || loc == mRec709Location) {
if (loc < 0) {
continue;
}
// u.offset is in 'uint32_t' units

View File

@@ -19,20 +19,17 @@
#include "DriverBase.h"
#include "BindingMap.h"
#include "OpenGLContext.h"
#include "ShaderCompilerService.h"
#include <private/backend/Driver.h>
#include <backend/DriverEnums.h>
#include <backend/Program.h>
#include <utils/bitset.h>
#include <utils/compiler.h>
#include <utils/FixedCapacityVector.h>
#include <utils/Slice.h>
#include <array>
#include <limits>
#include <stddef.h>
@@ -72,25 +69,32 @@ public:
}
context.useProgram(gl.program);
if (UTILS_UNLIKELY(mUsedBindingsCount)) {
// We rely on GL state tracking to avoid unnecessary glBindTexture / glBindSampler
// calls.
// we need to do this if:
// - the content of mSamplerBindings has changed
// - the content of any bound sampler buffer has changed
// ... since last time we used this program
// Turns out the former might be relatively cheap to check, the latter requires
// a bit less. Compared to what updateSamplers() actually does, which is
// pretty little, I'm not sure if we'll get ahead.
updateSamplers(gld);
}
return true;
}
GLuint getBufferBinding(descriptor_set_t set, descriptor_binding_t binding) const noexcept {
return mBindingMap.get(set, binding);
}
GLuint getTextureUnit(descriptor_set_t set, descriptor_binding_t binding) const noexcept {
return mBindingMap.get(set, binding);
}
utils::bitset64 getActiveDescriptors(descriptor_set_t set) const noexcept {
return mBindingMap.getActiveDescriptors(set);
}
// For ES2 only
void updateUniforms(uint32_t index, GLuint id, void const* buffer, uint16_t age) const noexcept;
void updateUniforms(uint32_t index, GLuint id, void const* buffer, uint16_t age) noexcept;
void setRec709ColorSpace(bool rec709) const noexcept;
struct {
GLuint program = 0;
} gl; // 4 bytes
PushConstantBundle getPushConstants() {
auto fragBegin = mPushConstants.begin() + mPushConstantFragmentStageOffset;
return {
@@ -108,15 +112,22 @@ private:
void initializeProgramState(OpenGLContext& context, GLuint program,
LazyInitializationData& lazyInitializationData) noexcept;
BindingMap mBindingMap; // 8 bytes + out-of-line 256 bytes
void updateSamplers(OpenGLDriver* gld) const noexcept;
// number of bindings actually used by this program
std::array<uint8_t, Program::SAMPLER_BINDING_COUNT> mUsedSamplerBindingPoints; // 4 bytes
ShaderCompilerService::program_token_t mToken{}; // 16 bytes
// Note that this can be replaced with a raw pointer and an uint8_t (for size) to reduce the
// size of the container to 9 bytes if there is a need in the future.
utils::FixedCapacityVector<std::pair<GLint, ConstantType>> mPushConstants;// 16 bytes
uint8_t mUsedBindingsCount = 0u; // 1 byte
UTILS_UNUSED uint8_t padding[2] = {}; // 2 byte
// Push constant array offset for fragment stage constants.
uint8_t mPushConstantFragmentStageOffset = 0u; // 1 byte
// only needed for ES2
GLint mRec709Location = -1; // 4 bytes
using LocationInfo = utils::FixedCapacityVector<GLint>;
struct UniformsRecord {
Program::UniformInfo uniforms;
@@ -124,20 +135,15 @@ private:
mutable GLuint id = 0;
mutable uint16_t age = std::numeric_limits<uint16_t>::max();
};
UniformsRecord const* mUniformsRecords = nullptr;
GLint mRec709Location : 24; // 4 bytes
UniformsRecord const* mUniformsRecords = nullptr; // 8 bytes
// Push constant array offset for fragment stage constants.
GLint mPushConstantFragmentStageOffset : 8; // 1 byte
public:
struct {
GLuint program = 0;
} gl; // 4 bytes
// Note that this can be replaced with a raw pointer and an uint8_t (for size) to reduce the
// size of the container to 9 bytes if there is a need in the future.
utils::FixedCapacityVector<std::pair<GLint, ConstantType>> mPushConstants;// 16 bytes
};
// if OpenGLProgram is larger than 96 bytes, it'll fall in a larger Handle bucket.
static_assert(sizeof(OpenGLProgram) <= 96); // currently 96 bytes
// if OpenGLProgram is larger tha 64 bytes, it'll fall in a larger Handle bucket.
static_assert(sizeof(OpenGLProgram) <= 64); // currently 64 bytes
} // namespace filament::backend

View File

@@ -155,6 +155,10 @@ Driver* PlatformEGL::createDriver(void* sharedContext, const Platform::DriverCon
ext.egl.KHR_no_config_context = extensions.has("EGL_KHR_no_config_context");
ext.egl.KHR_surfaceless_context = extensions.has("EGL_KHR_surfaceless_context");
ext.egl.EXT_protected_content = extensions.has("EGL_EXT_protected_content");
if (ext.egl.KHR_create_context) {
// KHR_create_context implies KHR_surfaceless_context for ES3.x contexts
ext.egl.KHR_surfaceless_context = true;
}
eglCreateSyncKHR = (PFNEGLCREATESYNCKHRPROC) eglGetProcAddress("eglCreateSyncKHR");
eglDestroySyncKHR = (PFNEGLDESTROYSYNCKHRPROC) eglGetProcAddress("eglDestroySyncKHR");
@@ -553,8 +557,6 @@ void PlatformEGL::destroySwapChain(Platform::SwapChain* swapChain) noexcept {
if (swapChain) {
SwapChainEGL const* const sc = static_cast<SwapChainEGL const*>(swapChain);
if (sc->sur != EGL_NO_SURFACE) {
// - if EGL_KHR_surfaceless_context is supported, mEGLDummySurface is EGL_NO_SURFACE.
// - this is actually a bit too aggressive, but it is a rare operation.
egl.makeCurrent(mEGLDummySurface, mEGLDummySurface);
eglDestroySurface(mEGLDisplay, sc->sur);
delete sc;

View File

@@ -25,14 +25,12 @@
#include "ExternalStreamManagerAndroid.h"
#include <android/api-level.h>
#include <android/native_window.h>
#include <android/hardware_buffer.h>
#include <utils/android/PerformanceHintManager.h>
#include <utils/compiler.h>
#include <utils/ostream.h>
#include <utils/Panic.h>
#include <utils/Log.h>
#include <EGL/egl.h>
@@ -44,9 +42,7 @@
#include <chrono>
#include <new>
#include <string_view>
#include <dlfcn.h>
#include <unistd.h>
#include <stddef.h>
@@ -84,6 +80,8 @@ UTILS_PRIVATE PFNEGLGETFRAMETIMESTAMPSANDROIDPROC eglGetFrameTimestampsANDROID =
}
using namespace glext;
using EGLStream = Platform::Stream;
// ---------------------------------------------------------------------------------------------
PlatformEGLAndroid::InitializeJvmForPerformanceManagerIfNeeded::InitializeJvmForPerformanceManagerIfNeeded() {
@@ -103,73 +101,38 @@ PlatformEGLAndroid::PlatformEGLAndroid() noexcept
mExternalStreamManager(ExternalStreamManagerAndroid::create()),
mInitializeJvmForPerformanceManagerIfNeeded(),
mPerformanceHintManager() {
mOSVersion = android_get_device_api_level();
if (mOSVersion < 0) {
mOSVersion = __ANDROID_API_FUTURE__;
char scratch[PROP_VALUE_MAX + 1];
int length = __system_property_get("ro.build.version.release", scratch);
int const androidVersion = length >= 0 ? atoi(scratch) : 1;
if (!androidVersion) {
mOSVersion = 1000; // if androidVersion is 0, it means "future"
} else {
length = __system_property_get("ro.build.version.sdk", scratch);
mOSVersion = length >= 0 ? atoi(scratch) : 1;
}
mNativeWindowLib = dlopen("libnativewindow.so", RTLD_LOCAL | RTLD_NOW);
if (mNativeWindowLib) {
ANativeWindow_getBuffersDefaultDataSpace =
(int32_t(*)(ANativeWindow*))dlsym(mNativeWindowLib,
"ANativeWindow_getBuffersDefaultDataSpace");
}
// This disables an ANGLE optimization on ARM, which turns out to be more costly for us
// see b/229017581
// We need to do this before we create the GL context.
// An alternative solution is use a system property:
// __system_property_set(
// "debug.angle.feature_overrides_disabled",
// "preferSubmitAtFBOBoundary");
// but that would outlive this process, so the environment variable is better.
// We also make sure to not update the variable if it already exists.
// There is no harm setting this if we're not on ANGLE or ARM.
setenv("ANGLE_FEATURE_OVERRIDES_DISABLED", "preferSubmitAtFBOBoundary", false);
}
PlatformEGLAndroid::~PlatformEGLAndroid() noexcept {
if (mNativeWindowLib) {
dlclose(mNativeWindowLib);
}
}
PlatformEGLAndroid::~PlatformEGLAndroid() noexcept = default;
void PlatformEGLAndroid::terminate() noexcept {
ExternalStreamManagerAndroid::destroy(&mExternalStreamManager);
PlatformEGL::terminate();
}
static constexpr const std::string_view kNativeWindowInvalidMsg =
"ANativeWindow is invalid. It probably has been destroyed. EGL surface = ";
bool PlatformEGLAndroid::makeCurrent(ContextType type,
SwapChain* drawSwapChain,
SwapChain* readSwapChain) noexcept {
// fast & safe path
if (UTILS_LIKELY(!mAssertNativeWindowIsValid)) {
return PlatformEGL::makeCurrent(type, drawSwapChain, readSwapChain);
}
SwapChainEGL const* const dsc = static_cast<SwapChainEGL const*>(drawSwapChain);
if (ANativeWindow_getBuffersDefaultDataSpace) {
// anw can be nullptr if we're using a pbuffer surface
if (UTILS_LIKELY(dsc->nativeWindow)) {
// this a proxy of is_valid()
auto result = ANativeWindow_getBuffersDefaultDataSpace(dsc->nativeWindow);
FILAMENT_CHECK_POSTCONDITION(result >= 0) << kNativeWindowInvalidMsg << dsc->sur;
}
} else {
// If we don't have ANativeWindow_getBuffersDefaultDataSpace, we revert to using the
// private query() call.
// Shadow version if the real ANativeWindow, so we can access the query() hook. Query
// has existed since forever, probably Android 1.0.
struct NativeWindow {
// is valid query enum value
enum { IS_VALID = 17 };
uint64_t pad[18];
int (* query)(ANativeWindow const*, int, int*);
} const* pWindow = reinterpret_cast<NativeWindow const*>(dsc->nativeWindow);
int isValid = 0;
if (UTILS_LIKELY(pWindow->query)) { // just in case it's nullptr
int const err = pWindow->query(dsc->nativeWindow, NativeWindow::IS_VALID, &isValid);
if (UTILS_LIKELY(err >= 0)) { // in case the IS_VALID enum is not recognized
// query call succeeded
FILAMENT_CHECK_POSTCONDITION(isValid) << kNativeWindowInvalidMsg << dsc->sur;
}
}
}
return PlatformEGL::makeCurrent(type, drawSwapChain, readSwapChain);
}
void PlatformEGLAndroid::beginFrame(
int64_t monotonic_clock_ns,
int64_t refreshIntervalNs,
@@ -226,8 +189,6 @@ Driver* PlatformEGLAndroid::createDriver(void* sharedContext,
"eglGetFrameTimestampsANDROID");
}
mAssertNativeWindowIsValid = driverConfig.assertNativeWindowIsValid;
return driver;
}

View File

@@ -1,149 +0,0 @@
/*
* Copyright (C) 2024 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <backend/platforms/PlatformOSMesa.h>
#include <utils/Log.h>
#include <utils/Panic.h>
#include <dlfcn.h>
#include <memory>
namespace filament::backend {
using namespace backend;
namespace {
using BackingType = GLfloat;
#define BACKING_GL_TYPE GL_FLOAT
struct OSMesaSwapchain {
OSMesaSwapchain(uint32_t width, uint32_t height)
: width(width),
height(height),
buffer(new uint8_t[width * height * 4 * sizeof(BackingType)]) {}
uint32_t width = 0;
uint32_t height = 0;
std::unique_ptr<uint8_t[]> buffer;
};
struct OSMesaAPI {
private:
using CreateContextFunc = OSMesaContext (*)(GLenum format, OSMesaContext);
using DestroyContextFunc = GLboolean (*)(OSMesaContext);
using MakeCurrentFunc = GLboolean (*)(OSMesaContext ctx, void* buffer, GLenum type,
GLsizei width, GLsizei height);
using GetProcAddressFunc = OSMESAproc (*)(const char* funcName);
public:
CreateContextFunc OSMesaCreateContext;
DestroyContextFunc OSMesaDestroyContext;
MakeCurrentFunc OSMesaMakeCurrent;
GetProcAddressFunc OSMesaGetProcAddress;
OSMesaAPI() {
constexpr char const* libraryNames[] = {"libOSMesa.so", "libosmesa.so"};
for (char const* libName: libraryNames) {
mLib = dlopen(libName, RTLD_GLOBAL | RTLD_NOW);
if (mLib) {
break;
}
}
FILAMENT_CHECK_PRECONDITION(mLib)
<< "Unable to dlopen libOSMesa to create a software GL context";
OSMesaGetProcAddress = (GetProcAddressFunc) dlsym(mLib, "OSMesaGetProcAddress");
OSMesaCreateContext = (CreateContextFunc) OSMesaGetProcAddress("OSMesaCreateContext");
OSMesaDestroyContext =
(DestroyContextFunc) OSMesaGetProcAddress("OSMesaDestroyContext");
OSMesaMakeCurrent = (MakeCurrentFunc) OSMesaGetProcAddress("OSMesaMakeCurrent");
}
~OSMesaAPI() {
dlclose(mLib);
}
private:
void* mLib = nullptr;
};
}// anonymous namespace
Driver* PlatformOSMesa::createDriver(void* const sharedGLContext,
const DriverConfig& driverConfig) noexcept {
OSMesaAPI* api = new OSMesaAPI();
mOsMesaApi = api;
FILAMENT_CHECK_PRECONDITION(sharedGLContext == nullptr)
<< "shared GL context is not supported with PlatformOSMesa";
mContext = api->OSMesaCreateContext(GL_RGBA, NULL);
// We need to do a no-op makecurrent here so that the context will be in a correct state before
// any GL calls.
auto chain = createSwapChain(1, 1, 0);
makeCurrent(ContextType::UNPROTECTED, chain, nullptr);
destroySwapChain(chain);
int result = bluegl::bind();
FILAMENT_CHECK_POSTCONDITION(!result) << "Unable to load OpenGL entry points.";
return OpenGLPlatform::createDefaultDriver(this, sharedGLContext, driverConfig);
}
void PlatformOSMesa::terminate() noexcept {
OSMesaAPI* api = (OSMesaAPI*) mOsMesaApi;
api->OSMesaDestroyContext(mContext);
delete api;
mOsMesaApi = nullptr;
bluegl::unbind();
}
Platform::SwapChain* PlatformOSMesa::createSwapChain(void* nativeWindow, uint64_t flags) noexcept {
FILAMENT_CHECK_POSTCONDITION(false) << "Cannot create non-headless swapchain";
return (SwapChain*) nativeWindow;
}
Platform::SwapChain* PlatformOSMesa::createSwapChain(uint32_t width, uint32_t height,
uint64_t flags) noexcept {
OSMesaSwapchain* swapchain = new OSMesaSwapchain(width, height);
return (SwapChain*) swapchain;
}
void PlatformOSMesa::destroySwapChain(Platform::SwapChain* swapChain) noexcept {
OSMesaSwapchain* impl = (OSMesaSwapchain*) swapChain;
delete impl;
}
bool PlatformOSMesa::makeCurrent(ContextType type, SwapChain* drawSwapChain,
SwapChain* readSwapChain) noexcept {
OSMesaAPI* api = (OSMesaAPI*) mOsMesaApi;
OSMesaSwapchain* impl = (OSMesaSwapchain*) drawSwapChain;
auto result = api->OSMesaMakeCurrent(mContext, (BackingType*) impl->buffer.get(),
BACKING_GL_TYPE, impl->width, impl->height);
FILAMENT_CHECK_POSTCONDITION(result == GL_TRUE) << "OSMesaMakeCurrent failed!";
return true;
}
void PlatformOSMesa::commit(Platform::SwapChain* swapChain) noexcept {
// No-op since we are not scanning out to a display.
}
} // namespace filament::backend

View File

@@ -19,8 +19,8 @@
#include <Wingdi.h>
#ifdef _MSC_VER
// this variable is checked in BlueGL.h (included from "gl_headers.h" right after this),
// and prevents duplicate definition of OpenGL apis when building this file.
// this variable is checked in BlueGL.h (included from "gl_headers.h" right after this),
// and prevents duplicate definition of OpenGL apis when building this file.
// However, GL_GLEXT_PROTOTYPES need to be defined in BlueGL.h when included from other files.
#define FILAMENT_PLATFORM_WGL
#endif
@@ -37,8 +37,9 @@
namespace {
void reportWindowsError(DWORD dwError) {
void reportLastWindowsError() {
LPSTR lpMessageBuffer = nullptr;
DWORD dwError = GetLastError();
if (dwError == 0) {
return;
@@ -79,7 +80,6 @@ Driver* PlatformWGL::createDriver(void* const sharedGLContext,
const Platform::DriverConfig& driverConfig) noexcept {
int result = 0;
int pixelFormat = 0;
DWORD dwError = 0;
mPfd = {
sizeof(PIXELFORMATDESCRIPTOR),
@@ -105,7 +105,6 @@ Driver* PlatformWGL::createDriver(void* const sharedGLContext,
mHWnd = CreateWindowA("STATIC", "dummy", 0, 0, 0, 1, 1, NULL, NULL, NULL, NULL);
HDC whdc = mWhdc = GetDC(mHWnd);
if (whdc == NULL) {
dwError = GetLastError();
utils::slog.e << "CreateWindowA() failed" << utils::io::endl;
goto error;
}
@@ -116,7 +115,6 @@ Driver* PlatformWGL::createDriver(void* const sharedGLContext,
// We need a tmp context to retrieve and call wglCreateContextAttribsARB.
tempContext = wglCreateContext(whdc);
if (!wglMakeCurrent(whdc, tempContext)) {
dwError = GetLastError();
utils::slog.e << "wglMakeCurrent() failed, whdc=" << whdc << ", tempContext=" <<
tempContext << utils::io::endl;
goto error;
@@ -138,7 +136,6 @@ Driver* PlatformWGL::createDriver(void* const sharedGLContext,
if (mContext) {
break;
}
dwError = GetLastError();
}
if (!mContext) {
@@ -151,7 +148,6 @@ Driver* PlatformWGL::createDriver(void* const sharedGLContext,
tempContext = NULL;
if (!wglMakeCurrent(whdc, mContext)) {
dwError = GetLastError();
utils::slog.e << "wglMakeCurrent() failed, whdc=" << whdc << ", mContext=" <<
mContext << utils::io::endl;
goto error;
@@ -166,7 +162,7 @@ error:
if (tempContext) {
wglDeleteContext(tempContext);
}
reportWindowsError(dwError);
reportLastWindowsError();
terminate();
return NULL;
}
@@ -209,11 +205,9 @@ Platform::SwapChain* PlatformWGL::createSwapChain(void* nativeWindow, uint64_t f
// on Windows, the nativeWindow maps to a HWND
swapChain->hWnd = (HWND) nativeWindow;
swapChain->hDc = GetDC(swapChain->hWnd);
if (!swapChain->hDc) {
DWORD dwError = GetLastError();
ASSERT_POSTCONDITION_NON_FATAL(swapChain->hDc,
"Unable to create the SwapChain (nativeWindow = %p)", nativeWindow);
reportWindowsError(dwError);
if (!ASSERT_POSTCONDITION_NON_FATAL(swapChain->hDc,
"Unable to create the SwapChain (nativeWindow = %p)", nativeWindow)) {
reportLastWindowsError();
}
// We have to match pixel formats across the HDC and HGLRC (mContext)
@@ -270,10 +264,8 @@ bool PlatformWGL::makeCurrent(ContextType type, SwapChain* drawSwapChain,
HDC hdc = wglSwapChain->hDc;
if (hdc != NULL) {
BOOL success = wglMakeCurrent(hdc, mContext);
if (!success) {
DWORD dwError = GetLastError();
ASSERT_POSTCONDITION_NON_FATAL(success, "wglMakeCurrent() failed. hdc = %p", hdc);
reportWindowsError(dwError);
if (!ASSERT_POSTCONDITION_NON_FATAL(success, "wglMakeCurrent() failed. hdc = %p", hdc)) {
reportLastWindowsError();
wglMakeCurrent(0, NULL);
}
}

View File

@@ -410,6 +410,7 @@ io::ostream& operator<<(io::ostream& out, const RasterState& rs) {
io::ostream& operator<<(io::ostream& out, const TargetBufferInfo& tbi) {
return out << "TargetBufferInfo{"
<< "handle=" << tbi.handle
<< ", baseViewIndex=" << tbi.baseViewIndex
<< ", level=" << tbi.level
<< ", layer=" << tbi.layer << "}";
}

View File

@@ -1,43 +0,0 @@
/*
* Copyright (C) 2024 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "VulkanAsyncHandles.h"
namespace filament::backend {
VulkanTimerQuery::VulkanTimerQuery(std::tuple<uint32_t, uint32_t> indices)
: mStartingQueryIndex(std::get<0>(indices)),
mStoppingQueryIndex(std::get<1>(indices)) {}
void VulkanTimerQuery::setFence(std::shared_ptr<VulkanCmdFence> fence) noexcept {
std::unique_lock<utils::Mutex> lock(mFenceMutex);
mFence = fence;
}
bool VulkanTimerQuery::isCompleted() noexcept {
std::unique_lock<utils::Mutex> lock(mFenceMutex);
// QueryValue is a synchronous call and might occur before beginTimerQuery has written anything
// into the command buffer, which is an error according to the validation layer that ships in
// the Android NDK. Even when AVAILABILITY_BIT is set, validation seems to require that the
// timestamp has at least been written into a processed command buffer.
// This fence indicates that the corresponding buffer has been completed.
return mFence && mFence->getStatus() == VK_SUCCESS;
}
VulkanTimerQuery::~VulkanTimerQuery() = default;
} // namespace filament::backend

View File

@@ -1,81 +0,0 @@
/*
* Copyright (C) 2024 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TNT_FILAMENT_BACKEND_VULKANASYNCHANDLES_H
#define TNT_FILAMENT_BACKEND_VULKANASYNCHANDLES_H
#include <bluevk/BlueVK.h>
#include "DriverBase.h"
#include "vulkan/memory/Resource.h"
#include <utils/Mutex.h>
#include <utils/Condition.h>
namespace filament::backend {
// Wrapper to enable use of shared_ptr for implementing shared ownership of low-level Vulkan fences.
struct VulkanCmdFence {
VulkanCmdFence(VkResult initialStatus) {
// Internally we use the VK_INCOMPLETE status to mean "not yet submitted". When this fence
// gets submitted, its status changes to VK_NOT_READY. Finally, when the GPU actually
// finishes executing the command buffer, the status changes to VK_SUCCESS.
status.store(initialStatus);
}
~VulkanCmdFence() = default;
void setStatus(VkResult value) { status.store(value); }
VkResult getStatus() { return status.load(std::memory_order_acquire); }
private:
std::atomic<VkResult> status;
};
struct VulkanFence : public HwFence, fvkmemory::ThreadSafeResource {
VulkanFence() {}
std::shared_ptr<VulkanCmdFence> fence;
};
struct VulkanTimerQuery : public HwTimerQuery, fvkmemory::ThreadSafeResource {
explicit VulkanTimerQuery(std::tuple<uint32_t, uint32_t> indices);
~VulkanTimerQuery();
void setFence(std::shared_ptr<VulkanCmdFence> fence) noexcept;
bool isCompleted() noexcept;
uint32_t getStartingQueryIndex() const {
return mStartingQueryIndex;
}
uint32_t getStoppingQueryIndex() const {
return mStoppingQueryIndex;
}
private:
uint32_t mStartingQueryIndex;
uint32_t mStoppingQueryIndex;
std::shared_ptr<VulkanCmdFence> mFence;
utils::Mutex mFenceMutex;
};
} // namespace filament::backend
#endif // TNT_FILAMENT_BACKEND_VULKANHASYNCANDLES_H

View File

@@ -15,7 +15,6 @@
*/
#include "VulkanBlitter.h"
#include "VulkanCommands.h"
#include "VulkanContext.h"
#include "VulkanFboCache.h"
#include "VulkanHandles.h"
@@ -34,10 +33,9 @@ namespace filament::backend {
namespace {
inline void blitFast(VulkanCommandBuffer* commands, VkImageAspectFlags aspect, VkFilter filter,
inline void blitFast(const VkCommandBuffer cmdbuffer, VkImageAspectFlags aspect, VkFilter filter,
VulkanAttachment src, VulkanAttachment dst,
const VkOffset3D srcRect[2], const VkOffset3D dstRect[2]) {
VkCommandBuffer const cmdbuf = commands->buffer();
if constexpr (FVK_ENABLED(FVK_DEBUG_BLITTER)) {
FVK_LOGD << "Fast blit from=" << src.texture->getVkImage() << ",level=" << (int) src.level
<< " layout=" << src.getLayout()
@@ -51,8 +49,8 @@ inline void blitFast(VulkanCommandBuffer* commands, VkImageAspectFlags aspect, V
VulkanLayout oldSrcLayout = src.getLayout();
VulkanLayout oldDstLayout = dst.getLayout();
src.texture->transitionLayout(commands, srcRange, VulkanLayout::TRANSFER_SRC);
dst.texture->transitionLayout(commands, dstRange, VulkanLayout::TRANSFER_DST);
src.texture->transitionLayout(cmdbuffer, srcRange, VulkanLayout::TRANSFER_SRC);
dst.texture->transitionLayout(cmdbuffer, dstRange, VulkanLayout::TRANSFER_DST);
const VkImageBlit blitRegions[1] = {{
.srcSubresource = { aspect, src.level, src.layer, 1 },
@@ -60,24 +58,23 @@ inline void blitFast(VulkanCommandBuffer* commands, VkImageAspectFlags aspect, V
.dstSubresource = { aspect, dst.level, dst.layer, 1 },
.dstOffsets = { dstRect[0], dstRect[1] },
}};
vkCmdBlitImage(cmdbuf,
vkCmdBlitImage(cmdbuffer,
src.getImage(), imgutil::getVkLayout(VulkanLayout::TRANSFER_SRC),
dst.getImage(), imgutil::getVkLayout(VulkanLayout::TRANSFER_DST),
1, blitRegions, filter);
if (oldSrcLayout == VulkanLayout::UNDEFINED) {
oldSrcLayout = src.texture->getDefaultLayout();
oldSrcLayout = imgutil::getDefaultLayout(src.texture->usage);
}
if (oldDstLayout == VulkanLayout::UNDEFINED) {
oldDstLayout = dst.texture->getDefaultLayout();
oldDstLayout = imgutil::getDefaultLayout(dst.texture->usage);
}
src.texture->transitionLayout(commands, srcRange, oldSrcLayout);
dst.texture->transitionLayout(commands, dstRange, oldDstLayout);
src.texture->transitionLayout(cmdbuffer, srcRange, oldSrcLayout);
dst.texture->transitionLayout(cmdbuffer, dstRange, oldDstLayout);
}
inline void resolveFast(VulkanCommandBuffer* commands, VkImageAspectFlags aspect,
inline void resolveFast(const VkCommandBuffer cmdbuffer, VkImageAspectFlags aspect,
VulkanAttachment src, VulkanAttachment dst) {
VkCommandBuffer const cmdbuffer = commands->buffer();
if constexpr (FVK_ENABLED(FVK_DEBUG_BLITTER)) {
FVK_LOGD << "Fast blit from=" << src.texture->getVkImage() << ",level=" << (int) src.level
<< " layout=" << src.getLayout()
@@ -91,8 +88,8 @@ inline void resolveFast(VulkanCommandBuffer* commands, VkImageAspectFlags aspect
VulkanLayout oldSrcLayout = src.getLayout();
VulkanLayout oldDstLayout = dst.getLayout();
src.texture->transitionLayout(commands, srcRange, VulkanLayout::TRANSFER_SRC);
dst.texture->transitionLayout(commands, dstRange, VulkanLayout::TRANSFER_DST);
src.texture->transitionLayout(cmdbuffer, srcRange, VulkanLayout::TRANSFER_SRC);
dst.texture->transitionLayout(cmdbuffer, dstRange, VulkanLayout::TRANSFER_DST);
assert_invariant(
aspect != VK_IMAGE_ASPECT_DEPTH_BIT && "Resolve with depth is not yet supported.");
@@ -109,13 +106,13 @@ inline void resolveFast(VulkanCommandBuffer* commands, VkImageAspectFlags aspect
1, resolveRegions);
if (oldSrcLayout == VulkanLayout::UNDEFINED) {
oldSrcLayout = src.texture->getDefaultLayout();
oldSrcLayout = imgutil::getDefaultLayout(src.texture->usage);
}
if (oldDstLayout == VulkanLayout::UNDEFINED) {
oldDstLayout = dst.texture->getDefaultLayout();
oldDstLayout = imgutil::getDefaultLayout(dst.texture->usage);
}
src.texture->transitionLayout(commands, srcRange, oldSrcLayout);
dst.texture->transitionLayout(commands, dstRange, oldDstLayout);
src.texture->transitionLayout(cmdbuffer, srcRange, oldSrcLayout);
dst.texture->transitionLayout(cmdbuffer, dstRange, oldDstLayout);
}
struct BlitterUniforms {
@@ -126,7 +123,8 @@ struct BlitterUniforms {
}// anonymous namespace
VulkanBlitter::VulkanBlitter(VkPhysicalDevice physicalDevice, VulkanCommands* commands) noexcept
: mPhysicalDevice(physicalDevice), mCommands(commands) {}
: mPhysicalDevice(physicalDevice),
mCommands(commands) {}
void VulkanBlitter::resolve(VulkanAttachment dst, VulkanAttachment src) {
@@ -150,11 +148,11 @@ void VulkanBlitter::resolve(VulkanAttachment dst, VulkanAttachment src) {
}
#endif
VulkanCommandBuffer& commands = dst.texture->getIsProtected() ?
mCommands->getProtected() : mCommands->get();
VulkanCommandBuffer& commands = mCommands->get();
VkCommandBuffer const cmdbuffer = commands.buffer();
commands.acquire(src.texture);
commands.acquire(dst.texture);
resolveFast(&commands, aspect, src, dst);
resolveFast(cmdbuffer, aspect, src, dst);
}
void VulkanBlitter::blit(VkFilter filter,
@@ -176,11 +174,11 @@ void VulkanBlitter::blit(VkFilter filter,
#endif
// src and dst should have the same aspect here
VkImageAspectFlags const aspect = src.texture->getImageAspect();
VulkanCommandBuffer& commands = dst.texture->getIsProtected() ?
mCommands->getProtected() : mCommands->get();
VulkanCommandBuffer& commands = mCommands->get();
VkCommandBuffer const cmdbuffer = commands.buffer();
commands.acquire(src.texture);
commands.acquire(dst.texture);
blitFast(&commands, aspect, filter, src, dst, srcRectPair, dstRectPair);
blitFast(cmdbuffer, aspect, filter, src, dst, srcRectPair, dstRectPair);
}
void VulkanBlitter::terminate() noexcept {

View File

@@ -33,194 +33,275 @@ using namespace utils;
namespace filament::backend {
namespace {
#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
using Timestamp = VulkanGroupMarkers::Timestamp;
#endif
VkCommandBuffer createCommandBuffer(VkDevice device, VkCommandPool pool) {
VkCommandBuffer cmdbuffer;
// Create the low-level command buffer.
VkCommandBufferAllocateInfo const allocateInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.commandPool = pool,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount = 1,
};
// The buffer allocated here will be implicitly reset when vkBeginCommandBuffer is called.
// We don't need to deallocate since destroying the pool will free all of the buffers.
vkAllocateCommandBuffers(device, &allocateInfo, &cmdbuffer);
return cmdbuffer;
VulkanCmdFence::VulkanCmdFence(VkFence ifence)
: fence(ifence) {
// Internally we use the VK_INCOMPLETE status to mean "not yet submitted". When this fence gets
// submitted, its status changes to VK_NOT_READY. Finally, when the GPU actually finishes
// executing the command buffer, the status changes to VK_SUCCESS.
status.store(VK_INCOMPLETE);
}
} // anonymous namespace
VulkanCommandBuffer::VulkanCommandBuffer(VulkanResourceAllocator* allocator, VkDevice device,
VkCommandPool pool)
: mResourceManager(allocator),
mPipeline(VK_NULL_HANDLE) {
// Create the low-level command buffer.
const VkCommandBufferAllocateInfo allocateInfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.commandPool = pool,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount = 1,
};
// The buffer allocated here will be implicitly reset when vkBeginCommandBuffer is called.
// We don't need to deallocate since destroying the pool will free all of the buffers.
vkAllocateCommandBuffers(device, &allocateInfo, &mBuffer);
}
CommandBufferObserver::~CommandBufferObserver() {}
static VkCommandPool createPool(VkDevice device, uint32_t queueFamilyIndex) {
VkCommandPoolCreateInfo createInfo = {
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT
| VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,
.queueFamilyIndex = queueFamilyIndex,
};
VkCommandPool pool;
vkCreateCommandPool(device, &createInfo, VKALLOC, &pool);
return pool;
}
#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
void VulkanGroupMarkers::push(std::string const& marker, Timestamp start) noexcept {
mMarkers.push_back({marker,
start.time_since_epoch().count() > 0.0
? start
: std::chrono::high_resolution_clock::now()});
mMarkers.push_back(marker);
#if FVK_ENABLED(FVK_DEBUG_PRINT_GROUP_MARKERS)
mTimestamps.push_back(start.time_since_epoch().count() > 0.0
? start
: std::chrono::high_resolution_clock::now());
#endif
}
std::pair<std::string, Timestamp> VulkanGroupMarkers::pop() noexcept {
auto ret = mMarkers.back();
auto const marker = mMarkers.back();
mMarkers.pop_back();
return ret;
#if FVK_ENABLED(FVK_DEBUG_PRINT_GROUP_MARKERS)
auto const timestamp = mTimestamps.back();
mTimestamps.pop_back();
return std::make_pair(marker, timestamp);
#else
return std::make_pair(marker, Timestamp{});
#endif
}
std::pair<std::string, Timestamp> VulkanGroupMarkers::pop_bottom() noexcept {
auto ret = mMarkers.front();
auto const marker = mMarkers.front();
mMarkers.pop_front();
return ret;
#if FVK_ENABLED(FVK_DEBUG_PRINT_GROUP_MARKERS)
auto const timestamp = mTimestamps.front();
mTimestamps.pop_front();
return std::make_pair(marker, timestamp);
#else
return std::make_pair(marker, Timestamp{});
#endif
}
std::pair<std::string, Timestamp> const& VulkanGroupMarkers::top() const {
std::pair<std::string, Timestamp> VulkanGroupMarkers::top() const {
assert_invariant(!empty());
return mMarkers.back();
auto const marker = mMarkers.back();
#if FVK_ENABLED(FVK_DEBUG_PRINT_GROUP_MARKERS)
auto const topTimestamp = mTimestamps.front();
return std::make_pair(marker, topTimestamp);
#else
return std::make_pair(marker, Timestamp{});
#endif
}
bool VulkanGroupMarkers::empty() const noexcept {
return mMarkers.empty();
}
#endif // FVK_DEBUG_GROUP_MARKERS
VulkanCommandBuffer::VulkanCommandBuffer(VulkanContext* context, VkDevice device, VkQueue queue,
VkCommandPool pool, bool isProtected)
: mContext(context),
mMarkerCount(0),
isProtected(isProtected),
mDevice(device),
VulkanCommands::VulkanCommands(VkDevice device, VkQueue queue, uint32_t queueFamilyIndex,
VulkanContext* context, VulkanResourceAllocator* allocator)
: mDevice(device),
mQueue(queue),
mBuffer(createCommandBuffer(device, pool)),
mFenceStatus(std::make_shared<VulkanCmdFence>(VK_INCOMPLETE)) {
mPool(createPool(mDevice, queueFamilyIndex)),
mContext(context),
mStorage(CAPACITY) {
VkSemaphoreCreateInfo sci{.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO};
vkCreateSemaphore(mDevice, &sci, VKALLOC, &mSubmission);
for (auto& semaphore: mSubmissionSignals) {
vkCreateSemaphore(mDevice, &sci, nullptr, &semaphore);
}
VkFenceCreateInfo fenceCreateInfo{.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO};
vkCreateFence(device, &fenceCreateInfo, VKALLOC, &mFence);
}
VulkanCommandBuffer::~VulkanCommandBuffer() {
vkDestroySemaphore(mDevice, mSubmission, VKALLOC);
vkDestroyFence(mDevice, mFence, VKALLOC);
}
void VulkanCommandBuffer::reset() noexcept {
mMarkerCount = 0;
mResources.clear();
mWaitSemaphores.clear();
// Internally we use the VK_INCOMPLETE status to mean "not yet submitted". When this fence
// gets, gets submitted, its status changes to VK_NOT_READY. Finally, when the GPU actually
// finishes executing the command buffer, the status changes to VK_SUCCESS.
mFenceStatus = std::make_shared<VulkanCmdFence>(VK_INCOMPLETE);
vkResetFences(mDevice, 1, &mFence);
}
void VulkanCommandBuffer::pushMarker(char const* marker) noexcept {
if (mContext->isDebugUtilsSupported()) {
VkDebugUtilsLabelEXT labelInfo = {
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
.pLabelName = marker,
.color = {0, 1, 0, 1},
};
vkCmdBeginDebugUtilsLabelEXT(mBuffer, &labelInfo);
} else if (mContext->isDebugMarkersSupported()) {
VkDebugMarkerMarkerInfoEXT markerInfo = {
.sType = VK_STRUCTURE_TYPE_DEBUG_MARKER_MARKER_INFO_EXT,
.pMarkerName = marker,
.color = {0.0f, 1.0f, 0.0f, 1.0f},
};
vkCmdDebugMarkerBeginEXT(mBuffer, &markerInfo);
for (auto& fence: mFences) {
vkCreateFence(device, &fenceCreateInfo, VKALLOC, &fence);
}
mMarkerCount++;
}
void VulkanCommandBuffer::popMarker() noexcept{
assert_invariant(mMarkerCount > 0);
if (mContext->isDebugUtilsSupported()) {
vkCmdEndDebugUtilsLabelEXT(mBuffer);
} else if (mContext->isDebugMarkersSupported()) {
vkCmdDebugMarkerEndEXT(mBuffer);
for (size_t i = 0; i < CAPACITY; ++i) {
mStorage[i] = std::make_unique<VulkanCommandBuffer>(allocator, mDevice, mPool);
}
mMarkerCount--;
#if !FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
(void) mContext;
#endif
}
void VulkanCommandBuffer::insertEvent(char const* marker) noexcept {
if (mContext->isDebugUtilsSupported()) {
VkDebugUtilsLabelEXT labelInfo = {
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
.pLabelName = marker,
.color = {1, 1, 0, 1},
};
vkCmdInsertDebugUtilsLabelEXT(mBuffer, &labelInfo);
} else if (mContext->isDebugMarkersSupported()) {
VkDebugMarkerMarkerInfoEXT markerInfo = {
.sType = VK_STRUCTURE_TYPE_DEBUG_MARKER_MARKER_INFO_EXT,
.pMarkerName = marker,
.color = {0.0f, 1.0f, 0.0f, 1.0f},
};
vkCmdDebugMarkerInsertEXT(mBuffer, &markerInfo);
void VulkanCommands::terminate() {
wait();
gc();
vkDestroyCommandPool(mDevice, mPool, VKALLOC);
for (VkSemaphore sema: mSubmissionSignals) {
vkDestroySemaphore(mDevice, sema, VKALLOC);
}
for (VkFence fence: mFences) {
vkDestroyFence(mDevice, fence, VKALLOC);
}
}
void VulkanCommandBuffer::begin() noexcept {
// Begin writing into the command buffer.
VkCommandBufferBeginInfo const binfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
};
vkBeginCommandBuffer(mBuffer, &binfo);
}
VkSemaphore VulkanCommandBuffer::submit() {
while (mMarkerCount > 0) {
popMarker();
}
vkEndCommandBuffer(mBuffer);
VkPipelineStageFlags const waitDestStageMasks[2] = {
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
};
VkSubmitInfo submitInfo{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.waitSemaphoreCount = mWaitSemaphores.size(),
.pWaitSemaphores = mWaitSemaphores.data(),
.pWaitDstStageMask = waitDestStageMasks,
.commandBufferCount = 1u,
.pCommandBuffers = &mBuffer,
.signalSemaphoreCount = 1u,
.pSignalSemaphores = &mSubmission,
};
// add submit protection if needed
VkProtectedSubmitInfo protectedSubmitInfo{
.sType = VK_STRUCTURE_TYPE_PROTECTED_SUBMIT_INFO,
.protectedSubmit = VK_TRUE,
};
if (isProtected) {
submitInfo.pNext = &protectedSubmitInfo;
VulkanCommandBuffer& VulkanCommands::get() {
if (mCurrentCommandBufferIndex >= 0) {
return *mStorage[mCurrentCommandBufferIndex].get();
}
// If we ran out of available command buffers, stall until one finishes. This is very rare.
// It occurs only when Filament invokes commit() or endFrame() a large number of times without
// presenting the swap chain or waiting on a fence.
while (mAvailableBufferCount == 0) {
#if FVK_ENABLED(FVK_DEBUG_COMMAND_BUFFER)
FVK_LOGI << "Submitting cmdbuffer=" << mBuffer
<< " wait=(";
for (size_t s = 0, count = mWaitSemaphores.size(); s < count; ++s) {
FVK_LOGI << mWaitSemaphores[s] << " ";
FVK_LOGI << "VulkanCommands has stalled. "
<< "If this occurs frequently, consider increasing VK_MAX_COMMAND_BUFFERS."
<< io::endl;
#endif
wait();
gc();
}
VulkanCommandBuffer* currentbuf = nullptr;
// Find an available slot.
for (size_t i = 0; i < CAPACITY; ++i) {
auto wrapper = mStorage[i].get();
if (wrapper->buffer() == VK_NULL_HANDLE) {
mCurrentCommandBufferIndex = static_cast<int8_t>(i);
currentbuf = wrapper;
break;
}
}
assert_invariant(currentbuf);
mAvailableBufferCount--;
// Note that the fence wrapper uses shared_ptr because a DriverAPI fence can also have ownership
// over it. The destruction of the low-level fence occurs either in VulkanCommands::gc(), or in
// VulkanDriver::destroyFence(), both of which are safe spots.
currentbuf->fence = std::make_shared<VulkanCmdFence>(mFences[mCurrentCommandBufferIndex]);
// Begin writing into the command buffer.
const VkCommandBufferBeginInfo binfo{
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
};
vkBeginCommandBuffer(currentbuf->buffer(), &binfo);
// Notify the observer that a new command buffer has been activated.
if (mObserver) {
mObserver->onCommandBuffer(*currentbuf);
}
#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
// We push the current markers onto a temporary stack. This must be placed after currentbuf is
// set to the new command buffer since pushGroupMarker also calls get().
while (mCarriedOverMarkers && !mCarriedOverMarkers->empty()) {
auto [marker, time] = mCarriedOverMarkers->pop();
pushGroupMarker(marker.c_str(), time);
}
#endif
return *currentbuf;
}
bool VulkanCommands::flush() {
// It's perfectly fine to call flush when no commands have been written.
if (mCurrentCommandBufferIndex < 0) {
return false;
}
// Before actually submitting, we need to pop any leftover group markers.
// Note that this needs to occur before vkEndCommandBuffer.
#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
while (mGroupMarkers && !mGroupMarkers->empty()) {
if (!mCarriedOverMarkers) {
mCarriedOverMarkers = std::make_unique<VulkanGroupMarkers>();
}
auto const [marker, time] = mGroupMarkers->top();
mCarriedOverMarkers->push(marker, time);
// We still need to call through to vkCmdEndDebugUtilsLabelEXT.
popGroupMarker();
}
FVK_LOGI << ") "
<< " signal=" << mSubmission
<< " fence=" << mFence << utils::io::endl;
#endif
mFenceStatus->setStatus(VK_NOT_READY);
UTILS_UNUSED_IN_RELEASE VkResult result =
vkQueueSubmit(mQueue, 1, &submitInfo, mFence);
int8_t const index = mCurrentCommandBufferIndex;
VulkanCommandBuffer const* currentbuf = mStorage[index].get();
VkSemaphore const renderingFinished = mSubmissionSignals[index];
vkEndCommandBuffer(currentbuf->buffer());
// If the injected semaphore is an "image available" semaphore that has not yet been signaled,
// it is sometimes fine to start executing commands anyway, as along as we stall the GPU at the
// VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT stage. However we need to assume the worst
// here and use VK_PIPELINE_STAGE_ALL_COMMANDS_BIT. This is a more aggressive stall, but it is
// the only safe option because the previously submitted command buffer might have set up some
// state that the new command buffer depends on.
VkPipelineStageFlags waitDestStageMasks[2] = {
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
};
VkSemaphore signals[2] = {
VK_NULL_HANDLE,
VK_NULL_HANDLE,
};
uint32_t waitSemaphoreCount = 0;
if (mSubmissionSignal) {
signals[waitSemaphoreCount++] = mSubmissionSignal;
}
if (mInjectedSignal) {
signals[waitSemaphoreCount++] = mInjectedSignal;
}
VkCommandBuffer const cmdbuffer = currentbuf->buffer();
VkSubmitInfo submitInfo{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.waitSemaphoreCount = waitSemaphoreCount,
.pWaitSemaphores = waitSemaphoreCount > 0 ? signals : nullptr,
.pWaitDstStageMask = waitDestStageMasks,
.commandBufferCount = 1,
.pCommandBuffers = &cmdbuffer,
.signalSemaphoreCount = 1u,
.pSignalSemaphores = &renderingFinished,
};
#if FVK_ENABLED(FVK_DEBUG_COMMAND_BUFFER)
FVK_LOGI << "Submitting cmdbuffer=" << cmdbuffer
<< " wait=(" << signals[0] << ", " << signals[1] << ") "
<< " signal=" << renderingFinished
<< " fence=" << currentbuf->fence->fence
<< utils::io::endl;
#endif
auto& cmdfence = currentbuf->fence;
std::unique_lock<utils::Mutex> lock(cmdfence->mutex);
cmdfence->status.store(VK_NOT_READY);
UTILS_UNUSED_IN_RELEASE VkResult result = vkQueueSubmit(mQueue, 1, &submitInfo, cmdfence->fence);
cmdfence->condition.notify_all();
lock.unlock();
#if FVK_ENABLED(FVK_DEBUG_COMMAND_BUFFER)
if (result != VK_SUCCESS) {
@@ -228,298 +309,175 @@ VkSemaphore VulkanCommandBuffer::submit() {
}
#endif
assert_invariant(result == VK_SUCCESS);
mWaitSemaphores.clear();
return mSubmission;
}
CommandBufferPool::CommandBufferPool(VulkanContext* context, VkDevice device, VkQueue queue,
uint8_t queueFamilyIndex, bool isProtected)
: mDevice(device),
mRecording(INVALID) {
VkCommandPoolCreateInfo createInfo = {
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT |
VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
(isProtected ? VK_COMMAND_POOL_CREATE_PROTECTED_BIT : 0u),
.queueFamilyIndex = queueFamilyIndex,
};
vkCreateCommandPool(device, &createInfo, VKALLOC, &mPool);
for (size_t i = 0; i < CAPACITY; ++i) {
mBuffers.emplace_back(
std::make_unique<VulkanCommandBuffer>(context, device, queue, mPool, isProtected));
}
}
CommandBufferPool::~CommandBufferPool() {
wait();
gc();
vkDestroyCommandPool(mDevice, mPool, VKALLOC);
}
VulkanCommandBuffer& CommandBufferPool::getRecording() {
if (isRecording()) {
return *mBuffers[mRecording];
}
auto const findNext = [this]() {
for (int8_t i = 0; i < CAPACITY; ++i) {
if (!mSubmitted[i]) {
return i;
}
}
return INVALID;
};
while ((mRecording = findNext()) == INVALID) {
wait();
gc();
}
auto& recording = *mBuffers[mRecording];
recording.begin();
#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
if (mGroupMarkers) {
std::unique_ptr<VulkanGroupMarkers> markers = std::make_unique<VulkanGroupMarkers>();
while (!mGroupMarkers->empty()) {
auto [marker, timestamp] = mGroupMarkers->pop_bottom();
recording.pushMarker(marker.c_str());
markers->push(marker, timestamp);
}
std::swap(mGroupMarkers, markers);
}
#endif
return recording;
}
void CommandBufferPool::gc() {
ActiveBuffers reclaimed;
mSubmitted.forEachSetBit([this,&reclaimed] (size_t index) {
auto& buffer = mBuffers[index];
if (buffer->getStatus() == VK_SUCCESS) {
reclaimed.set(index, true);
buffer->reset();
}
});
mSubmitted &= ~reclaimed;
}
void CommandBufferPool::update() {
mSubmitted.forEachSetBit([this] (size_t index) {
auto& buffer = mBuffers[index];
VkResult status = vkGetFenceStatus(mDevice, buffer->getVkFence());
if (status == VK_SUCCESS) {
buffer->setComplete();
}
});
}
VkSemaphore CommandBufferPool::flush() {
// We're not recording right now.
if (!isRecording()) {
return VK_NULL_HANDLE;
}
auto submitSemaphore = mBuffers[mRecording]->submit();
mSubmitted.set(mRecording, true);
mRecording = INVALID;
return submitSemaphore;
}
void CommandBufferPool::wait() {
uint8_t count = 0;
VkFence fences[CAPACITY];
mSubmitted.forEachSetBit([this, &count, &fences] (size_t index) {
fences[count++] = mBuffers[index]->getVkFence();
});
vkWaitForFences(mDevice, count, fences, VK_TRUE, UINT64_MAX);
update();
}
void CommandBufferPool::waitFor(VkSemaphore previousAction) {
if (!isRecording()) {
return;
}
auto& recording = mBuffers[mRecording];
recording->insertWait(previousAction);
}
#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
std::string CommandBufferPool::topMarker() const {
if (!mGroupMarkers || mGroupMarkers->empty()) {
return "";
}
return std::get<0>(mGroupMarkers->top());
}
void CommandBufferPool::pushMarker(char const* marker, VulkanGroupMarkers::Timestamp timestamp) {
if (!mGroupMarkers) {
mGroupMarkers = std::make_unique<VulkanGroupMarkers>();
}
mGroupMarkers->push(marker, timestamp);
getRecording().pushMarker(marker);
}
std::pair<std::string, VulkanGroupMarkers::Timestamp> CommandBufferPool::popMarker() {
assert_invariant(mGroupMarkers && !mGroupMarkers->empty());
auto ret = mGroupMarkers->pop();
// Note that if we're popping a marker while not recording, we would just pop the conceptual
// stack of marker (i.e. mGroupMarkers) and not carry out the pop on the command buffer.
if (isRecording()) {
getRecording().popMarker();
}
return ret;
}
void CommandBufferPool::insertEvent(char const* marker) {
getRecording().insertEvent(marker);
}
#endif // FVK_DEBUG_GROUP_MARKERS
VulkanCommands::VulkanCommands(VkDevice device, VkQueue queue, uint32_t queueFamilyIndex,
VkQueue protectedQueue, uint32_t protectedQueueFamilyIndex, VulkanContext* context)
: mDevice(device),
mProtectedQueue(protectedQueue),
mProtectedQueueFamilyIndex(protectedQueueFamilyIndex),
mContext(context),
mPool(std::make_unique<CommandBufferPool>(context, device, queue, queueFamilyIndex, false)) {}
void VulkanCommands::terminate() {
mPool.reset();
mProtectedPool.reset();
}
VulkanCommandBuffer& VulkanCommands::get() {
auto& ret = mPool->getRecording();
return ret;
}
VulkanCommandBuffer& VulkanCommands::getProtected() {
assert_invariant(mProtectedQueue != VK_NULL_HANDLE);
if (!mProtectedPool) {
mProtectedPool = std::make_unique<CommandBufferPool>(mContext, mDevice, mProtectedQueue,
mProtectedQueueFamilyIndex, true);
}
auto& ret = mProtectedPool->getRecording();
return ret;
}
bool VulkanCommands::flush() {
// It's possible to call flush and wait at "terminate", in which case, we'll just return.
if (!mPool && !mProtectedPool) {
return false;
}
VkSemaphore dependency = mInjectedDependency;
VkSemaphore lastSubmit = mLastSubmit;
bool hasFlushed = false;
// Note that we've ordered it so that the non-protected commands are followed by the protected
// commands. This assumes that the protected commands will be that one doing the rendering into
// the protected memory (i.e. protected render target).
for (auto pool: {mPool.get(), mProtectedPool.get()}) {
if (!pool || !pool->isRecording()) {
continue;
}
if (dependency != VK_NULL_HANDLE) {
pool->waitFor(dependency);
}
if (lastSubmit != VK_NULL_HANDLE) {
pool->waitFor(lastSubmit);
lastSubmit = VK_NULL_HANDLE;
}
dependency = pool->flush();
hasFlushed = true;
}
if (hasFlushed) {
mInjectedDependency = VK_NULL_HANDLE;
mLastSubmit = dependency;
}
mSubmissionSignal = renderingFinished;
mInjectedSignal = VK_NULL_HANDLE;
mCurrentCommandBufferIndex = -1;
return true;
}
VkSemaphore VulkanCommands::acquireFinishedSignal() {
VkSemaphore semaphore = mSubmissionSignal;
mSubmissionSignal = VK_NULL_HANDLE;
#if FVK_ENABLED(FVK_DEBUG_COMMAND_BUFFER)
FVK_LOGI << "Acquiring " << semaphore << " (e.g. for vkQueuePresentKHR)" << io::endl;
#endif
return semaphore;
}
void VulkanCommands::injectDependency(VkSemaphore next) {
assert_invariant(mInjectedSignal == VK_NULL_HANDLE);
mInjectedSignal = next;
#if FVK_ENABLED(FVK_DEBUG_COMMAND_BUFFER)
FVK_LOGI << "Injecting " << next << " (e.g. due to vkAcquireNextImageKHR)" << io::endl;
#endif
}
void VulkanCommands::wait() {
// It's possible to call flush and wait at "terminate", in which case, we'll just return.
if (!mPool && !mProtectedPool) {
return;
VkFence fences[CAPACITY];
size_t count = 0;
for (size_t i = 0; i < CAPACITY; i++) {
auto wrapper = mStorage[i].get();
if (wrapper->buffer() != VK_NULL_HANDLE
&& mCurrentCommandBufferIndex != static_cast<int8_t>(i)) {
fences[count++] = wrapper->fence->fence;
}
}
FVK_SYSTRACE_CONTEXT();
FVK_SYSTRACE_START("commands::wait");
mPool->wait();
if (mProtectedPool) {
mProtectedPool->wait();
if (count > 0) {
vkWaitForFences(mDevice, count, fences, VK_TRUE, UINT64_MAX);
updateFences();
}
FVK_SYSTRACE_END();
}
void VulkanCommands::gc() {
FVK_SYSTRACE_CONTEXT();
FVK_SYSTRACE_START("commands::gc");
mPool->gc();
if (mProtectedPool) {
mProtectedPool->gc();
VkFence fences[CAPACITY];
size_t count = 0;
for (size_t i = 0; i < CAPACITY; i++) {
auto wrapper = mStorage[i].get();
if (wrapper->buffer() == VK_NULL_HANDLE) {
continue;
}
VkResult const result = vkGetFenceStatus(mDevice, wrapper->fence->fence);
if (result != VK_SUCCESS) {
continue;
}
fences[count++] = wrapper->fence->fence;
wrapper->fence->status.store(VK_SUCCESS);
wrapper->reset();
mAvailableBufferCount++;
}
if (count > 0) {
vkResetFences(mDevice, count, fences);
}
FVK_SYSTRACE_END();
}
void VulkanCommands::updateFences() {
mPool->update();
if (mProtectedPool) {
mProtectedPool->update();
for (size_t i = 0; i < CAPACITY; i++) {
auto wrapper = mStorage[i].get();
if (wrapper->buffer() != VK_NULL_HANDLE) {
VulkanCmdFence* fence = wrapper->fence.get();
if (fence) {
VkResult status = vkGetFenceStatus(mDevice, fence->fence);
// This is either VK_SUCCESS, VK_NOT_READY, or VK_ERROR_DEVICE_LOST.
fence->status.store(status, std::memory_order_relaxed);
}
}
}
}
#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
void VulkanCommands::pushGroupMarker(char const* str, VulkanGroupMarkers::Timestamp timestamp) {
mPool->pushMarker(str, timestamp);
if (mProtectedPool) {
mProtectedPool->pushMarker(str, timestamp);
}
#if FVK_ENABLED(FVK_DEBUG_PRINT_GROUP_MARKERS)
FVK_LOGD << "----> " << str << utils::io::endl;
// If the timestamp is not 0, then we are carrying over a marker across buffer submits.
// If it is 0, then this is a normal marker push and we should just print debug line as usual.
if (timestamp.time_since_epoch().count() == 0.0) {
FVK_LOGD << "----> " << str << utils::io::endl;
}
#endif
// TODO: Add group marker color to the Driver API
VkCommandBuffer const cmdbuffer = get().buffer();
if (!mGroupMarkers) {
mGroupMarkers = std::make_unique<VulkanGroupMarkers>();
}
mGroupMarkers->push(str, timestamp);
if (mContext->isDebugUtilsSupported()) {
VkDebugUtilsLabelEXT labelInfo = {
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
.pLabelName = str,
.color = {0, 1, 0, 1},
};
vkCmdBeginDebugUtilsLabelEXT(cmdbuffer, &labelInfo);
} else if (mContext->isDebugMarkersSupported()) {
VkDebugMarkerMarkerInfoEXT markerInfo = {
.sType = VK_STRUCTURE_TYPE_DEBUG_MARKER_MARKER_INFO_EXT,
.pMarkerName = str,
.color = {0.0f, 1.0f, 0.0f, 1.0f},
};
vkCmdDebugMarkerBeginEXT(cmdbuffer, &markerInfo);
}
}
void VulkanCommands::popGroupMarker() {
assert_invariant(mGroupMarkers);
if (!mGroupMarkers->empty()) {
VkCommandBuffer const cmdbuffer = get().buffer();
#if FVK_ENABLED(FVK_DEBUG_PRINT_GROUP_MARKERS)
auto ret = mPool->popMarker();
auto const& marker = ret.first;
auto const& startTime = ret.second;
auto const endTime = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> diff = endTime - startTime;
FVK_LOGD << "<---- " << marker << " elapsed: " << (diff.count() * 1000) << " ms"
<< utils::io::endl;
auto const [marker, startTime] = mGroupMarkers->pop();
auto const endTime = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> diff = endTime - startTime;
FVK_LOGD << "<---- " << marker << " elapsed: " << (diff.count() * 1000) << " ms"
<< utils::io::endl;
#else
mPool->popMarker();
#endif // FVK_DEBUG_PRINT_GROUP_MARKERS
if (mProtectedPool) {
mProtectedPool->popMarker();
mGroupMarkers->pop();
#endif
if (mContext->isDebugUtilsSupported()) {
vkCmdEndDebugUtilsLabelEXT(cmdbuffer);
} else if (mContext->isDebugMarkersSupported()) {
vkCmdDebugMarkerEndEXT(cmdbuffer);
}
} else if (mCarriedOverMarkers && !mCarriedOverMarkers->empty()) {
// It could be that pop is called between flush() and get() (new command buffer), in which
// case the marker is in "carried over" state, we'd just remove that. Since the
// mCarriedOverMarkers is in the opposite order, we pop the bottom instead of the top.
mCarriedOverMarkers->pop_bottom();
}
}
void VulkanCommands::insertEventMarker(char const* str, uint32_t len) {
mPool->insertEvent(str);
if (mProtectedPool) {
mProtectedPool->insertEvent(str);
void VulkanCommands::insertEventMarker(char const* string, uint32_t len) {
VkCommandBuffer const cmdbuffer = get().buffer();
if (mContext->isDebugUtilsSupported()) {
VkDebugUtilsLabelEXT labelInfo = {
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
.pLabelName = string,
.color = {1, 1, 0, 1},
};
vkCmdInsertDebugUtilsLabelEXT(cmdbuffer, &labelInfo);
} else if (mContext->isDebugMarkersSupported()) {
VkDebugMarkerMarkerInfoEXT markerInfo = {
.sType = VK_STRUCTURE_TYPE_DEBUG_MARKER_MARKER_INFO_EXT,
.pMarkerName = string,
.color = {0.0f, 1.0f, 0.0f, 1.0f},
};
vkCmdDebugMarkerInsertEXT(cmdbuffer, &markerInfo);
}
}
std::string VulkanCommands::getTopGroupMarker() const {
if (mProtectedPool) {
return mProtectedPool->topMarker();
if (!mGroupMarkers || mGroupMarkers->empty()) {
return "";
}
return mPool->topMarker();
return std::get<0>(mGroupMarkers->top());
}
#endif // FVK_DEBUG_GROUP_MARKERS

View File

@@ -21,10 +21,8 @@
#include "DriverBase.h"
#include "VulkanAsyncHandles.h"
#include "VulkanConstants.h"
#include "VulkanUtility.h"
#include "vulkan/memory/ResourcePointer.h"
#include "VulkanResources.h"
#include <utils/Condition.h>
#include <utils/FixedCapacityVector.h>
@@ -39,8 +37,6 @@
namespace filament::backend {
using namespace fvkmemory;
struct VulkanContext;
#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
@@ -51,122 +47,79 @@ public:
void push(std::string const& marker, Timestamp start = {}) noexcept;
std::pair<std::string, Timestamp> pop() noexcept;
std::pair<std::string, Timestamp> pop_bottom() noexcept;
std::pair<std::string, Timestamp> const& top() const;
std::pair<std::string, Timestamp> top() const;
bool empty() const noexcept;
private:
std::list<std::pair<std::string, Timestamp>> mMarkers;
std::list<std::string> mMarkers;
#if FVK_ENABLED(FVK_DEBUG_PRINT_GROUP_MARKERS)
std::list<Timestamp> mTimestamps;
#endif
};
#endif // FVK_DEBUG_GROUP_MARKERS
// Wrapper to enable use of shared_ptr for implementing shared ownership of low-level Vulkan fences.
struct VulkanCmdFence {
VulkanCmdFence(VkFence ifence);
~VulkanCmdFence() = default;
VkFence fence;
utils::Condition condition;
utils::Mutex mutex;
std::atomic<VkResult> status;
};
// The submission fence has shared ownership semantics because it is potentially wrapped by a
// DriverApi fence object and should not be destroyed until both the DriverApi object is freed and
// we're done waiting on the most recent submission of the given command buffer.
struct VulkanCommandBuffer {
VulkanCommandBuffer(VulkanContext* mContext,
VkDevice device, VkQueue queue, VkCommandPool pool, bool isProtected);
VulkanCommandBuffer(VulkanResourceAllocator* allocator, VkDevice device, VkCommandPool pool);
VulkanCommandBuffer(VulkanCommandBuffer const&) = delete;
VulkanCommandBuffer& operator=(VulkanCommandBuffer const&) = delete;
~VulkanCommandBuffer();
inline void acquire(fvkmemory::resource_ptr<fvkmemory::Resource> resource) {
mResources.push_back(resource);
inline void acquire(VulkanResource* resource) {
mResourceManager.acquire(resource);
}
void reset() noexcept;
inline void insertWait(VkSemaphore sem) {
mWaitSemaphores.insert(sem);
inline void acquire(VulkanAcquireOnlyResourceManager* srcResources) {
mResourceManager.acquireAll(srcResources);
}
void pushMarker(char const* marker) noexcept;
void popMarker() noexcept;
void insertEvent(char const* marker) noexcept;
void begin() noexcept;
VkSemaphore submit();
inline void setComplete() {
mFenceStatus->setStatus(VK_SUCCESS);
inline void reset() {
fence.reset();
mResourceManager.clear();
mPipeline = VK_NULL_HANDLE;
}
VkResult getStatus() {
return mFenceStatus->getStatus();
inline void setPipeline(VkPipeline pipeline) {
mPipeline = pipeline;
}
std::shared_ptr<VulkanCmdFence> getFenceStatus() const {
return mFenceStatus;
inline VkPipeline pipeline() const {
return mPipeline;
}
VkFence getVkFence() const {
return mFence;
inline VkCommandBuffer buffer() const {
if (fence) {
return mBuffer;
}
return VK_NULL_HANDLE;
}
VkCommandBuffer buffer() const {
return mBuffer;
}
std::shared_ptr<VulkanCmdFence> fence;
private:
VulkanContext* mContext;
uint8_t mMarkerCount;
bool const isProtected;
VkDevice mDevice;
VkQueue mQueue;
CappedArray<VkSemaphore, 2> mWaitSemaphores;
VulkanAcquireOnlyResourceManager mResourceManager;
VkCommandBuffer mBuffer;
VkSemaphore mSubmission;
VkFence mFence;
std::shared_ptr<VulkanCmdFence> mFenceStatus;
std::vector<fvkmemory::resource_ptr<Resource>> mResources;
VkPipeline mPipeline;
};
struct CommandBufferPool {
using ActiveBuffers = utils::bitset64;
static constexpr int8_t INVALID = -1;
CommandBufferPool(VulkanContext* context, VkDevice device, VkQueue queue,
uint8_t queueFamilyIndex, bool isProtected);
~CommandBufferPool();
VulkanCommandBuffer& getRecording();
void gc();
void update();
VkSemaphore flush();
void wait();
void waitFor(VkSemaphore previousAction);
#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
std::string topMarker() const;
void pushMarker(char const* marker, VulkanGroupMarkers::Timestamp timestamp);
std::pair<std::string, VulkanGroupMarkers::Timestamp> popMarker();
void insertEvent(char const* marker);
#endif
inline bool isRecording() const { return mRecording != INVALID; }
private:
static constexpr int CAPACITY = FVK_MAX_COMMAND_BUFFERS;
// int8 only goes up to 127, therefore capacity must be less than that.
static_assert(CAPACITY < 128);
// The number of bits in ActiveBuffers describe the usage of the buffers in the pool, so must be
// larger than the size of the pool.
static_assert(sizeof(ActiveBuffers) * 8 >= CAPACITY);
using BufferList = utils::FixedCapacityVector<std::unique_ptr<VulkanCommandBuffer>>;
VkDevice mDevice;
VkCommandPool mPool;
ActiveBuffers mSubmitted;
std::vector<std::unique_ptr<VulkanCommandBuffer>> mBuffers;
int8_t mRecording;
#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
std::unique_ptr<VulkanGroupMarkers> mGroupMarkers;
#endif
// Allows classes to be notified after a new command buffer has been activated.
class CommandBufferObserver {
public:
virtual void onCommandBuffer(const VulkanCommandBuffer& cmdbuffer) = 0;
virtual ~CommandBufferObserver();
};
// Manages a set of command buffers and semaphores, exposing an API that is significantly simpler
@@ -180,6 +133,9 @@ private:
// - This creates a guarantee of in-order execution.
// - Semaphores are recycled to prevent create / destroy churn.
//
// - Notifies listeners when recording begins in a new VkCommandBuffer.
// - Used by PipelineCache so that it knows when to clear out its shadow state.
//
// - Allows 1 user to inject a "dependency" semaphore that stalls the next flush.
// - This is used for asynchronous acquisition of a swap chain image, since the GPU
// might require a valid swap chain image when it starts executing the command buffer.
@@ -195,17 +151,13 @@ private:
class VulkanCommands {
public:
VulkanCommands(VkDevice device, VkQueue queue, uint32_t queueFamilyIndex,
VkQueue protectedQueue, uint32_t protectedQueueFamilyIndex, VulkanContext* context);
VulkanContext* context, VulkanResourceAllocator* allocator);
void terminate();
// Creates a "current" command buffer if none exists, otherwise returns the current one.
VulkanCommandBuffer& get();
// Creates a "current" protected capable command buffer if none exists, otherwise
// returns the current one.
VulkanCommandBuffer& getProtected();
// Submits the current command buffer if it exists, then sets "current" to null.
// If there are no outstanding commands then nothing happens and this returns false.
bool flush();
@@ -213,17 +165,11 @@ public:
// Returns the "rendering finished" semaphore for the most recent flush and removes
// it from the existing dependency chain. This is especially useful for setting up
// vkQueuePresentKHR.
VkSemaphore acquireFinishedSignal() {
VkSemaphore ret= mLastSubmit;
mLastSubmit = VK_NULL_HANDLE;
return ret;
}
VkSemaphore acquireFinishedSignal();
// Takes a semaphore that signals when the next flush can occur. Only one injected
// semaphore is allowed per flush. Useful after calling vkAcquireNextImageKHR.
void injectDependency(VkSemaphore next) {
mInjectedDependency = next;
}
void injectDependency(VkSemaphore next);
// Destroys all command buffers that are no longer in use.
void gc();
@@ -234,25 +180,42 @@ public:
// Updates the atomic "status" variable in every extant fence.
void updateFences();
// Sets an observer who is notified every time a new command buffer has been made "current".
// The observer's event handler can only be called during get().
void setObserver(CommandBufferObserver* observer) { mObserver = observer; }
#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
void pushGroupMarker(char const* str, VulkanGroupMarkers::Timestamp timestamp = {});
void popGroupMarker();
void insertEventMarker(char const* string, uint32_t len);
std::string getTopGroupMarker() const;
#endif
private:
static constexpr int CAPACITY = FVK_MAX_COMMAND_BUFFERS;
VkDevice const mDevice;
VkQueue const mProtectedQueue;
// For defered initialization if/when we need protected content
uint32_t const mProtectedQueueFamilyIndex;
VulkanContext* mContext;
VkQueue const mQueue;
VkCommandPool const mPool;
VulkanContext const* mContext;
std::unique_ptr<CommandBufferPool> mPool;
std::unique_ptr<CommandBufferPool> mProtectedPool;
// int8 only goes up to 127, therefore capacity must be less than that.
static_assert(CAPACITY < 128);
int8_t mCurrentCommandBufferIndex = -1;
VkSemaphore mSubmissionSignal = {};
VkSemaphore mInjectedSignal = {};
utils::FixedCapacityVector<std::unique_ptr<VulkanCommandBuffer>> mStorage;
VkFence mFences[CAPACITY] = {};
VkSemaphore mSubmissionSignals[CAPACITY] = {};
uint8_t mAvailableBufferCount = CAPACITY;
CommandBufferObserver* mObserver = nullptr;
VkSemaphore mInjectedDependency = VK_NULL_HANDLE;
VkSemaphore mLastSubmit = VK_NULL_HANDLE;
#if FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS)
std::unique_ptr<VulkanGroupMarkers> mGroupMarkers;
std::unique_ptr<VulkanGroupMarkers> mCarriedOverMarkers;
#endif
};
} // namespace filament::backend

View File

@@ -1,7 +1,7 @@
/*
* Copyright (C) 2021 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
@@ -63,7 +63,7 @@
#define FVK_DEBUG_SHADER_MODULE 0x00000800
#define FVK_DEBUG_READ_PIXELS 0x00001000
#define FVK_DEBUG_PIPELINE_CACHE 0x00002000
#define FVK_DEBUG_STAGING_ALLOCATION 0x00004000
#define FVK_DEBUG_ALLOCATION 0x00004000
// Enable the debug utils extension if it is available.
#define FVK_DEBUG_DEBUG_UTILS 0x00008000
@@ -77,12 +77,8 @@
// order of calls).
#define FVK_DEBUG_FORCE_LOG_TO_I 0x00020000
// Enable a minimal set of traces to assess the performance of the backend.
// All other debug features must be disabled.
#define FVK_DEBUG_PROFILING 0x00040000
// Useful default combinations
#define FVK_DEBUG_EVERYTHING (0xFFFFFFFF & ~FVK_DEBUG_PROFILING)
#define FVK_DEBUG_EVERYTHING 0xFFFFFFFF
#define FVK_DEBUG_PERFORMANCE \
FVK_DEBUG_SYSTRACE
@@ -98,12 +94,6 @@
#define FVK_DEBUG_FLAGS 0
#endif
// Override the debug flags if we are forcing profiling mode
#if defined(FILAMENT_FORCE_PROFILING_MODE)
#undef FVK_DEBUG_FLAGS
#define FVK_DEBUG_FLAGS (FVK_DEBUG_PROFILING)
#endif
#define FVK_ENABLED(flags) (((FVK_DEBUG_FLAGS) & (flags)) == (flags))
// Group marker only works only if validation or debug utils is enabled since it uses
@@ -122,10 +112,6 @@ static_assert(FVK_ENABLED(FVK_DEBUG_GROUP_MARKERS));
static_assert(FVK_ENABLED(FVK_DEBUG_VALIDATION));
#endif
#if FVK_ENABLED(FVK_DEBUG_PROFILING) && FVK_DEBUG_FLAGS != FVK_DEBUG_PROFILING
#error PROFILING is exclusive; all other debug features must be disabled.
#endif
// end dependcy checks
// Shorthand for combination of enabled debug flags
@@ -137,34 +123,17 @@ static_assert(FVK_ENABLED(FVK_DEBUG_VALIDATION));
// end shorthands
#if FVK_DEBUG_FLAGS == FVK_DEBUG_PROFILING
#if FVK_ENABLED(FVK_DEBUG_SYSTRACE)
#ifndef NDEBUG
#error PROFILING is meaningless in DEBUG mode.
#endif
#define FVK_SYSTRACE_CONTEXT()
#define FVK_SYSTRACE_START(marker)
#define FVK_SYSTRACE_END()
#define FVK_SYSTRACE_SCOPE()
#define FVK_PROFILE_MARKER(marker) PROFILE_SCOPE(marker)
#elif FVK_ENABLED(FVK_DEBUG_SYSTRACE)
#include <utils/Systrace.h>
#define FVK_SYSTRACE_CONTEXT() SYSTRACE_CONTEXT()
#define FVK_SYSTRACE_START(marker) SYSTRACE_NAME_BEGIN(marker)
#define FVK_SYSTRACE_END() SYSTRACE_NAME_END()
#define FVK_SYSTRACE_SCOPE() SYSTRACE_NAME(__func__)
#define FVK_PROFILE_MARKER(marker) FVK_SYSTRACE_SCOPE()
#include <utils/Systrace.h>
#define FVK_SYSTRACE_CONTEXT() SYSTRACE_CONTEXT()
#define FVK_SYSTRACE_START(marker) SYSTRACE_NAME_BEGIN(marker)
#define FVK_SYSTRACE_END() SYSTRACE_NAME_END()
#else
#define FVK_SYSTRACE_CONTEXT()
#define FVK_SYSTRACE_START(marker)
#define FVK_SYSTRACE_END()
#define FVK_SYSTRACE_SCOPE()
#define FVK_PROFILE_MARKER(marker)
#define FVK_SYSTRACE_CONTEXT()
#define FVK_SYSTRACE_START(marker)
#define FVK_SYSTRACE_END()
#endif
#ifndef FVK_HANDLE_ARENA_SIZE_IN_MB
@@ -196,16 +165,14 @@ constexpr static const int FVK_REQUIRED_VERSION_MINOR = 1;
// buffers that have been submitted but have not yet finished rendering. Note that Filament can
// issue multiple commit calls in a single frame, and that we use a triple buffered swap chain on
// some platforms.
//
// Heuristic: Triple Buffering (3) multiplied by maximum number of renderpasses (15).
constexpr static const int FVK_MAX_COMMAND_BUFFERS = 3 * 15;
constexpr static const int FVK_MAX_COMMAND_BUFFERS = 10;
// Number of command buffer submissions that should occur before an unused pipeline is removed
// from the cache.
//
// If this number is low, VkPipeline construction will occur frequently, which can
// be extremely slow. If this number is high, the memory footprint will be large.
constexpr static const int FVK_MAX_PIPELINE_AGE = FVK_MAX_COMMAND_BUFFERS;
constexpr static const int FVK_MAX_PIPELINE_AGE = 10;
// VulkanPipelineCache does not track which command buffers contain references to which pipelines,
// instead it simply waits for at least FVK_MAX_COMMAND_BUFFERS submissions to occur before

View File

@@ -25,11 +25,15 @@
#include <backend/PixelBufferDescriptor.h>
#include <utils/Panic.h>
#include <utils/FixedCapacityVector.h>
#include <algorithm> // for std::max
using namespace bluevk;
using utils::FixedCapacityVector;
namespace {
} // end anonymous namespace
@@ -82,7 +86,7 @@ VulkanTimestamps::VulkanTimestamps(VkDevice device) : mDevice(device) {
VkQueryPoolCreateInfo tqpCreateInfo = {
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
.queryType = VK_QUERY_TYPE_TIMESTAMP,
};
};
std::unique_lock<utils::Mutex> lock(mMutex);
tqpCreateInfo.queryCount = mUsed.size() * 2;
VkResult result = vkCreateQueryPool(mDevice, &tqpCreateInfo, VKALLOC, &mPool);
@@ -97,7 +101,7 @@ std::tuple<uint32_t, uint32_t> VulkanTimestamps::getNextQuery() {
for (size_t timerIndex = 0; timerIndex < maxTimers; ++timerIndex) {
if (!mUsed.test(timerIndex)) {
mUsed.set(timerIndex);
return std::make_tuple(timerIndex * 2, timerIndex * 2 + 1);
return std::make_tuple(timerIndex * 2, timerIndex * 2 + 1);
}
}
FVK_LOGE << "More than " << maxTimers << " timers are not supported." << utils::io::endl;
@@ -109,7 +113,7 @@ void VulkanTimestamps::clearQuery(uint32_t queryIndex) {
}
void VulkanTimestamps::beginQuery(VulkanCommandBuffer const* commands,
fvkmemory::resource_ptr<VulkanTimerQuery> query) {
VulkanTimerQuery* query) {
uint32_t const index = query->getStartingQueryIndex();
auto const cmdbuffer = commands->buffer();
@@ -117,24 +121,23 @@ void VulkanTimestamps::beginQuery(VulkanCommandBuffer const* commands,
vkCmdWriteTimestamp(cmdbuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, mPool, index);
// We stash this because getResult might come before the query is actually processed.
query->setFence(commands->getFenceStatus());
query->setFence(commands->fence);
}
void VulkanTimestamps::endQuery(VulkanCommandBuffer const* commands,
fvkmemory::resource_ptr<VulkanTimerQuery> query) {
VulkanTimerQuery const* query) {
uint32_t const index = query->getStoppingQueryIndex();
vkCmdWriteTimestamp(commands->buffer(), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, mPool, index);
}
VulkanTimestamps::QueryResult VulkanTimestamps::getResult(
fvkmemory::resource_ptr<VulkanTimerQuery> query) {
VulkanTimestamps::QueryResult VulkanTimestamps::getResult(VulkanTimerQuery const* query) {
uint32_t const index = query->getStartingQueryIndex();
QueryResult result;
size_t const dataSize = result.size() * sizeof(uint64_t);
VkDeviceSize const stride = sizeof(uint64_t) * 2;
VkResult vkresult =
vkGetQueryPoolResults(mDevice, mPool, index, 2, dataSize, (void*) result.data(), stride,
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT);
vkGetQueryPoolResults(mDevice, mPool, index, 2, dataSize, (void*) result.data(),
stride, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT);
FILAMENT_CHECK_POSTCONDITION(vkresult == VK_SUCCESS || vkresult == VK_NOT_READY)
<< "vkGetQueryPoolResults error: " << static_cast<int32_t>(vkresult);
if (vkresult == VK_NOT_READY) {

View File

@@ -21,8 +21,6 @@
#include "VulkanImageUtility.h"
#include "VulkanUtility.h"
#include "vulkan/memory/ResourcePointer.h"
#include <utils/bitset.h>
#include <utils/FixedCapacityVector.h>
#include <utils/Mutex.h>
@@ -43,10 +41,11 @@ struct VulkanTimerQuery;
struct VulkanCommandBuffer;
struct VulkanAttachment {
fvkmemory::resource_ptr<VulkanTexture> texture;
VulkanTexture* texture = nullptr;
uint8_t level = 0;
uint8_t baseViewIndex = 0;
uint8_t layerCount = 1;
uint8_t layer = 0;
uint16_t layer = 0;
bool isDepth() const;
VkImage getImage() const;
@@ -72,11 +71,9 @@ public:
std::tuple<uint32_t, uint32_t> getNextQuery();
void clearQuery(uint32_t queryIndex);
void beginQuery(VulkanCommandBuffer const* commands,
fvkmemory::resource_ptr<VulkanTimerQuery> query);
void endQuery(VulkanCommandBuffer const* commands,
fvkmemory::resource_ptr<VulkanTimerQuery> query);
QueryResult getResult(fvkmemory::resource_ptr<VulkanTimerQuery> query);
void beginQuery(VulkanCommandBuffer const* commands, VulkanTimerQuery* query);
void endQuery(VulkanCommandBuffer const* commands, VulkanTimerQuery const* query);
QueryResult getResult(VulkanTimerQuery const* query);
private:
VkDevice mDevice;
@@ -86,9 +83,7 @@ private:
};
struct VulkanRenderPass {
// Between the begin and end command render pass we cache the command buffer
VulkanCommandBuffer* commandBuffer;
fvkmemory::resource_ptr<VulkanRenderTarget> renderTarget;
VulkanRenderTarget* renderTarget;
VkRenderPass renderPass;
RenderPassParams params;
int currentSubpass;
@@ -99,9 +94,6 @@ struct VulkanRenderPass {
struct VulkanContext {
public:
inline uint32_t selectMemoryType(uint32_t flags, VkFlags reqs) const {
if ((reqs & VK_MEMORY_PROPERTY_PROTECTED_BIT) != 0) {
assert_invariant(isProtectedMemorySupported() == true);
}
for (uint32_t i = 0; i < VK_MAX_MEMORY_TYPES; i++) {
if (flags & 1) {
if ((mMemoryProperties.memoryTypes[i].propertyFlags & reqs) == reqs) {
@@ -122,19 +114,19 @@ public:
}
inline VkPhysicalDeviceLimits const& getPhysicalDeviceLimits() const noexcept {
return mPhysicalDeviceProperties.properties.limits;
return mPhysicalDeviceProperties.limits;
}
inline uint32_t getPhysicalDeviceVendorId() const noexcept {
return mPhysicalDeviceProperties.properties.vendorID;
return mPhysicalDeviceProperties.vendorID;
}
inline bool isImageCubeArraySupported() const noexcept {
return mPhysicalDeviceFeatures.features.imageCubeArray == VK_TRUE;
return mPhysicalDeviceFeatures.imageCubeArray == VK_TRUE;
}
inline bool isDepthClampSupported() const noexcept {
return mPhysicalDeviceFeatures.features.depthClamp == VK_TRUE;
return mPhysicalDeviceFeatures.depthClamp == VK_TRUE;
}
inline bool isDebugMarkersSupported() const noexcept {
@@ -150,30 +142,16 @@ public:
}
inline bool isClipDistanceSupported() const noexcept {
return mPhysicalDeviceFeatures.features.shaderClipDistance == VK_TRUE;
}
inline bool isLazilyAllocatedMemorySupported() const noexcept {
return mLazilyAllocatedMemorySupported;
}
inline bool isProtectedMemorySupported() const noexcept {
return mProtectedMemorySupported;
return mPhysicalDeviceFeatures.shaderClipDistance == VK_TRUE;
}
private:
VkPhysicalDeviceMemoryProperties mMemoryProperties = {};
VkPhysicalDeviceProperties2 mPhysicalDeviceProperties = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2
};
VkPhysicalDeviceFeatures2 mPhysicalDeviceFeatures = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2
};
VkPhysicalDeviceProperties mPhysicalDeviceProperties = {};
VkPhysicalDeviceFeatures mPhysicalDeviceFeatures = {};
bool mDebugMarkersSupported = false;
bool mDebugUtilsSupported = false;
bool mMultiviewEnabled = false;
bool mLazilyAllocatedMemorySupported = false;
bool mProtectedMemorySupported = false;
VkFormatList mDepthStencilFormats;
VkFormatList mBlittableDepthStencilFormats;

File diff suppressed because it is too large Load Diff

View File

@@ -24,14 +24,13 @@
#include "VulkanHandles.h"
#include "VulkanPipelineCache.h"
#include "VulkanReadPixels.h"
#include "VulkanResourceAllocator.h"
#include "VulkanSamplerCache.h"
#include "VulkanStagePool.h"
#include "VulkanUtility.h"
#include "backend/DriverEnums.h"
#include "caching/VulkanDescriptorSetManager.h"
#include "caching/VulkanPipelineLayoutCache.h"
#include "memory/ResourceManager.h"
#include "memory/ResourcePointer.h"
#include "DriverBase.h"
#include "private/backend/Driver.h"
@@ -48,6 +47,21 @@ struct VulkanSamplerGroup;
constexpr uint8_t MAX_RENDERTARGET_ATTACHMENT_TEXTURES =
MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT * 2 + 1;
// We need to store information about a render pass to enable better barriers at the end of a
// renderpass.
struct RenderPassFboBundle {
using AttachmentArray =
CappedArray<VulkanAttachment, MAX_RENDERTARGET_ATTACHMENT_TEXTURES>;
AttachmentArray attachments;
bool hasColorResolve = false;
void clear() {
attachments.clear();
hasColorResolve = false;
}
};
class VulkanDriver final : public DriverBase {
public:
static Driver* create(VulkanPlatform* platform, VulkanContext const& context,
@@ -78,9 +92,6 @@ public:
#endif // FVK_ENABLED(FVK_DEBUG_DEBUG_UTILS)
private:
template<typename D>
using resource_ptr = fvkmemory::resource_ptr<D>;
static constexpr uint8_t MAX_SAMPLER_BINDING_COUNT = Program::SAMPLER_BINDING_COUNT;
void debugCommandBegin(CommandStream* cmds, bool synchronous,
@@ -117,16 +128,25 @@ private:
void collectGarbage();
VulkanPlatform* mPlatform = nullptr;
fvkmemory::ResourceManager mResourceManager;
std::unique_ptr<VulkanTimestamps> mTimestamps;
resource_ptr<VulkanSwapChain> mCurrentSwapChain;
resource_ptr<VulkanRenderTarget> mDefaultRenderTarget;
// Placeholder resources
VulkanTexture* mEmptyTexture;
VulkanBufferObject* mEmptyBufferObject;
VulkanSwapChain* mCurrentSwapChain = nullptr;
VulkanRenderTarget* mDefaultRenderTarget = nullptr;
VulkanRenderPass mCurrentRenderPass = {};
VmaAllocator mAllocator = VK_NULL_HANDLE;
VkDebugReportCallbackEXT mDebugCallback = VK_NULL_HANDLE;
VulkanContext mContext = {};
VulkanResourceAllocator mResourceAllocator;
VulkanResourceManager mResourceManager;
// Used for resources that are created synchronously and used and destroyed on the backend
// thread.
VulkanThreadSafeResourceManager mThreadSafeResourceManager;
VulkanCommands mCommands;
VulkanPipelineLayoutCache mPipelineLayoutCache;
@@ -139,19 +159,15 @@ private:
VulkanReadPixels mReadPixels;
VulkanDescriptorSetManager mDescriptorSetManager;
// This is necessary for us to write to push constants after binding a pipeline.
struct {
resource_ptr<VulkanProgram> program;
VkPipelineLayout pipelineLayout;
DescriptorSetMask descriptorSetMask;
} mBoundPipeline = {};
VulkanDescriptorSetManager::GetPipelineLayoutFunction mGetPipelineFunction;
// We need to store information about a render pass to enable better barriers at the end of a
// renderpass.
struct {
using AttachmentArray = CappedArray<VulkanAttachment, MAX_RENDERTARGET_ATTACHMENT_TEXTURES>;
AttachmentArray attachments;
} mRenderPassFboInfo = {};
// This is necessary for us to write to push constants after binding a pipeline.
struct BoundPipeline {
VulkanProgram* program;
VkPipelineLayout pipelineLayout;
};
BoundPipeline mBoundPipeline = {};
RenderPassFboBundle mRenderPassFboInfo;
bool const mIsSRGBSwapChainSupported;
backend::StereoscopicType const mStereoscopicType;

View File

@@ -31,6 +31,7 @@ namespace filament::backend {
bool VulkanFboCache::RenderPassEq::operator()(const RenderPassKey& k1,
const RenderPassKey& k2) const {
if (k1.initialColorLayoutMask != k2.initialColorLayoutMask) return false;
if (k1.initialDepthLayout != k2.initialDepthLayout) return false;
for (int i = 0; i < MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT; i++) {
if (k1.colorFormat[i] != k2.colorFormat[i]) return false;
@@ -41,7 +42,6 @@ bool VulkanFboCache::RenderPassEq::operator()(const RenderPassKey& k1,
if (k1.discardEnd != k2.discardEnd) return false;
if (k1.samples != k2.samples) return false;
if (k1.needsResolveMask != k2.needsResolveMask) return false;
if (k1.usesLazilyAllocatedMemory != k2.usesLazilyAllocatedMemory) return false;
if (k1.subpassMask != k2.subpassMask) return false;
if (k1.viewCount != k2.viewCount) return false;
return true;
@@ -69,8 +69,8 @@ VulkanFboCache::~VulkanFboCache() {
<< "Please explicitly call terminate() while the VkDevice is still alive.";
}
VkFramebuffer VulkanFboCache::getFramebuffer(FboKey const& config) noexcept {
FboMap::iterator iter = mFramebufferCache.find(config);
VkFramebuffer VulkanFboCache::getFramebuffer(FboKey config) noexcept {
auto iter = mFramebufferCache.find(config);
if (UTILS_LIKELY(iter != mFramebufferCache.end() && iter->second.handle != VK_NULL_HANDLE)) {
iter.value().timestamp = mCurrentTime;
return iter->second.handle;
@@ -121,7 +121,7 @@ VkFramebuffer VulkanFboCache::getFramebuffer(FboKey const& config) noexcept {
return framebuffer;
}
VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey const& config) noexcept {
VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey config) noexcept {
auto iter = mRenderPassCache.find(config);
if (UTILS_LIKELY(iter != mRenderPassCache.end() && iter->second.handle != VK_NULL_HANDLE)) {
iter.value().timestamp = mCurrentTime;
@@ -255,10 +255,12 @@ VkRenderPass VulkanFboCache::getRenderPass(RenderPassKey const& config) noexcept
.format = config.colorFormat[i],
.samples = (VkSampleCountFlagBits) config.samples,
.loadOp = clear ? kClear : (discard ? kDontCare : kKeep),
.storeOp = (config.usesLazilyAllocatedMemory & (1 << i)) ? kDisableStore : kEnableStore,
.storeOp = kEnableStore,
.stencilLoadOp = kDontCare,
.stencilStoreOp = kDisableStore,
.initialLayout = imgutil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT),
.initialLayout = ((!discard && config.initialColorLayoutMask & (1 << i)) || clear)
? imgutil::getVkLayout(VulkanLayout::COLOR_ATTACHMENT)
: imgutil::getVkLayout(VulkanLayout::UNDEFINED),
.finalLayout = imgutil::getVkLayout(FINAL_COLOR_ATTACHMENT_LAYOUT),
};
}
@@ -362,7 +364,7 @@ void VulkanFboCache::gc() noexcept {
}
const uint32_t evictTime = mCurrentTime - TIME_BEFORE_EVICTION;
for (FboMap::iterator iter = mFramebufferCache.begin(); iter != mFramebufferCache.end(); ++iter) {
for (auto iter = mFramebufferCache.begin(); iter != mFramebufferCache.end(); ++iter) {
const FboVal fbo = iter->second;
if (fbo.timestamp < evictTime && fbo.handle) {
mRenderPassRefCount[iter->first.renderPass]--;

View File

@@ -42,25 +42,36 @@ public:
// RenderPassKey is a small POD representing the immutable state that is used to construct
// a VkRenderPass. It is hashed and used as a lookup key.
struct alignas(8) RenderPassKey {
// For each target, we need to know three image layouts: the layout BEFORE the pass, the
// layout DURING the pass, and the layout AFTER the pass. Here are the rules:
// - For depth, we explicitly specify all three layouts.
// - Color targets have their initial image layout specified with a bitmask.
// - For each color target, the pre-existing layout is either UNDEFINED (0) or GENERAL (1).
// - The render pass and final images layout for color buffers is always
// VulkanLayout::COLOR_ATTACHMENT.
uint8_t initialColorLayoutMask;
// Note that if VulkanLayout grows beyond 16, we'd need to up this.
VulkanLayout initialDepthLayout : 8;
uint8_t padding0;
uint8_t padding1;
VkFormat colorFormat[MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT]; // 32 bytes
VkFormat depthFormat; // 4 bytes
TargetBufferFlags clear; // 4 bytes
TargetBufferFlags discardStart; // 4 bytes
TargetBufferFlags discardEnd; // 4 bytes
VulkanLayout initialDepthLayout; // 1 byte
uint8_t samples; // 1 byte
uint8_t needsResolveMask; // 1 byte
uint8_t usesLazilyAllocatedMemory; // 1 byte
uint8_t subpassMask; // 1 byte
uint8_t viewCount; // 1 byte
uint8_t padding[2];
};
struct RenderPassVal {
VkRenderPass handle;
uint32_t timestamp;
};
static_assert(0 == MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT % 8);
static_assert(sizeof(RenderPassKey::initialColorLayoutMask) == MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT / 8);
static_assert(sizeof(TargetBufferFlags) == 4, "TargetBufferFlags has unexpected size.");
static_assert(sizeof(VkFormat) == 4, "VkFormat has unexpected size.");
static_assert(sizeof(RenderPassKey) == 56, "RenderPassKey has unexpected size.");
@@ -98,10 +109,10 @@ public:
~VulkanFboCache();
// Retrieves or creates a VkFramebuffer handle.
VkFramebuffer getFramebuffer(FboKey const& config) noexcept;
VkFramebuffer getFramebuffer(FboKey config) noexcept;
// Retrieves or creates a VkRenderPass handle.
VkRenderPass getRenderPass(RenderPassKey const& config) noexcept;
VkRenderPass getRenderPass(RenderPassKey config) noexcept;
// Evicts old unused Vulkan objects. Call this once per frame.
void gc() noexcept;
@@ -111,9 +122,7 @@ public:
private:
VkDevice mDevice;
using FboMap = tsl::robin_map<FboKey, FboVal, FboKeyHashFn, FboKeyEqualFn>;
FboMap mFramebufferCache;
tsl::robin_map<FboKey, FboVal, FboKeyHashFn, FboKeyEqualFn> mFramebufferCache;
tsl::robin_map<RenderPassKey, RenderPassVal, RenderPassHash, RenderPassEq> mRenderPassCache;
tsl::robin_map<VkRenderPass, uint32_t> mRenderPassRefCount;
uint32_t mCurrentTime = 0;

View File

@@ -16,15 +16,13 @@
#include "VulkanHandles.h"
#include "VulkanConstants.h"
// TODO: remove this by moving DebugUtils out of VulkanDriver
#include "VulkanDriver.h"
#include "VulkanConstants.h"
#include "VulkanDriver.h"
#include "VulkanMemory.h"
#include "VulkanUtility.h"
#include "vulkan/memory/ResourcePointer.h"
#include "spirv/VulkanSpirvUtils.h"
#include "utils/Log.h"
#include <backend/platforms/VulkanPlatform.h>
@@ -36,12 +34,15 @@ namespace filament::backend {
namespace {
void flipVertically(VkRect2D* rect, uint32_t framebufferHeight) {
rect->offset.y = framebufferHeight - rect->offset.y - rect->extent.height;
}
void flipVertically(VkViewport* rect, uint32_t framebufferHeight) {
rect->y = framebufferHeight - rect->y - rect->height;
}
void clampToFramebuffer(VkRect2D* rect, uint32_t fbWidth, uint32_t fbHeight) {
rect->offset.y = fbHeight - rect->offset.y - rect->extent.height;
int32_t x = std::max(rect->offset.x, 0);
int32_t y = std::max(rect->offset.y, 0);
int32_t right = std::min(rect->offset.x + (int32_t) rect->extent.width, (int32_t) fbWidth);
@@ -53,14 +54,62 @@ void clampToFramebuffer(VkRect2D* rect, uint32_t fbWidth, uint32_t fbHeight) {
}
template<typename Bitmask>
inline void fromStageFlags(backend::ShaderStageFlags stage, descriptor_binding_t binding,
Bitmask& mask) {
if ((bool) (stage & ShaderStageFlags::VERTEX)) {
mask.set(binding + getVertexStageShift<Bitmask>());
static constexpr Bitmask fromStageFlags(ShaderStageFlags2 flags, uint8_t binding) {
Bitmask ret = 0;
if (flags & ShaderStageFlags2::VERTEX) {
ret |= (getVertexStage<Bitmask>() << binding);
}
if ((bool) (stage & ShaderStageFlags::FRAGMENT)) {
mask.set(binding + getFragmentStageShift<Bitmask>());
if (flags & ShaderStageFlags2::FRAGMENT) {
ret |= (getFragmentStage<Bitmask>() << binding);
}
return ret;
}
constexpr decltype(VulkanProgram::MAX_SHADER_MODULES) MAX_SHADER_MODULES =
VulkanProgram::MAX_SHADER_MODULES;
using LayoutDescriptionList = VulkanProgram::LayoutDescriptionList;
template<typename Bitmask>
void addDescriptors(Bitmask mask,
utils::FixedCapacityVector<DescriptorSetLayoutBinding>& outputList) {
constexpr uint8_t MODULE_OFFSET = (sizeof(Bitmask) * 8) / MAX_SHADER_MODULES;
for (uint8_t i = 0; i < MODULE_OFFSET; ++i) {
bool const hasVertex = (mask & (1ULL << i)) != 0;
bool const hasFragment = (mask & (1ULL << (MODULE_OFFSET + i))) != 0;
if (!hasVertex && !hasFragment) {
continue;
}
DescriptorSetLayoutBinding binding{
.binding = i,
.flags = DescriptorFlags::NONE,
.count = 0,// This is always 0 for now as we pass the size of the UBOs in the Driver API
// instead.
};
if (hasVertex) {
binding.stageFlags = ShaderStageFlags2::VERTEX;
}
if (hasFragment) {
binding.stageFlags = static_cast<ShaderStageFlags2>(
binding.stageFlags | ShaderStageFlags2::FRAGMENT);
}
if constexpr (std::is_same_v<Bitmask, UniformBufferBitmask>) {
binding.type = DescriptorType::UNIFORM_BUFFER;
} else if constexpr (std::is_same_v<Bitmask, SamplerBitmask>) {
binding.type = DescriptorType::SAMPLER;
} else if constexpr (std::is_same_v<Bitmask, InputAttachmentBitmask>) {
binding.type = DescriptorType::INPUT_ATTACHMENT;
}
outputList.push_back(binding);
}
}
inline VkDescriptorSetLayout createDescriptorSetLayout(VkDevice device,
VkDescriptorSetLayoutCreateInfo const& info) {
VkDescriptorSetLayout layout;
vkCreateDescriptorSetLayout(device, &info, VKALLOC, &layout);
return layout;
}
inline VkShaderStageFlags getVkStage(backend::ShaderStage stage) {
@@ -74,72 +123,22 @@ inline VkShaderStageFlags getVkStage(backend::ShaderStage stage) {
}
}
using BitmaskGroup = VulkanDescriptorSetLayout::Bitmask;
BitmaskGroup fromBackendLayout(DescriptorSetLayout const& layout) {
BitmaskGroup mask;
for (auto const& binding: layout.bindings) {
switch (binding.type) {
case DescriptorType::UNIFORM_BUFFER: {
if ((binding.flags & DescriptorFlags::DYNAMIC_OFFSET) != DescriptorFlags::NONE) {
fromStageFlags(binding.stageFlags, binding.binding, mask.dynamicUbo);
} else {
fromStageFlags(binding.stageFlags, binding.binding, mask.ubo);
}
break;
}
// TODO: properly handle external sampler
case DescriptorType::SAMPLER_EXTERNAL:
case DescriptorType::SAMPLER: {
fromStageFlags(binding.stageFlags, binding.binding, mask.sampler);
break;
}
case DescriptorType::INPUT_ATTACHMENT: {
fromStageFlags(binding.stageFlags, binding.binding, mask.inputAttachment);
break;
}
case DescriptorType::SHADER_STORAGE_BUFFER:
PANIC_POSTCONDITION("Shader storage is not supported");
break;
}
}
return mask;
}
fvkmemory::resource_ptr<VulkanTexture> initMsaaTexture(
fvkmemory::resource_ptr<VulkanTexture> texture, VkDevice device,
VkPhysicalDevice physicalDevice, VulkanContext const& context, VmaAllocator allocator,
VulkanCommands* commands, fvkmemory::ResourceManager* resManager, uint8_t levels,
uint8_t samples, VulkanStagePool& stagePool) {
assert_invariant(texture);
auto msTexture = texture->getSidecar();
if (UTILS_UNLIKELY(!msTexture)) {
// Clear all usage flags that are not related to attachments, so that we can
// use the transient usage flag.
const TextureUsage usage = texture->usage & TextureUsage::ALL_ATTACHMENTS;
assert_invariant(static_cast<uint16_t>(usage) != 0U);
msTexture = resource_ptr<VulkanTexture>::construct(resManager, device, physicalDevice,
context, allocator, resManager, commands, texture->target, levels, texture->format,
samples, texture->width, texture->height, texture->depth, usage, stagePool);
texture->setSidecar(msTexture);
}
return msTexture;
}
} // anonymous namespace
void VulkanDescriptorSet::acquire(fvkmemory::resource_ptr<VulkanTexture> texture) {
mResources.push_back(texture);
}
void VulkanDescriptorSet::acquire(fvkmemory::resource_ptr<VulkanBufferObject> obj) {
mResources.push_back(obj);
}
VulkanDescriptorSetLayout::VulkanDescriptorSetLayout(DescriptorSetLayout const& layout)
: bitmask(fromBackendLayout(layout)),
VulkanDescriptorSetLayout::VulkanDescriptorSetLayout(VkDevice device,
VkDescriptorSetLayoutCreateInfo const& info, Bitmask const& bitmask)
: VulkanResource(VulkanResourceType::DESCRIPTOR_SET_LAYOUT),
mDevice(device),
vklayout(createDescriptorSetLayout(device, info)),
bitmask(bitmask),
bindings(getBindings(bitmask)),
count(Count::fromLayoutBitmask(bitmask)) {}
VulkanDescriptorSetLayout::~VulkanDescriptorSetLayout() {
vkDestroyDescriptorSetLayout(mDevice, vklayout, VKALLOC);
}
PushConstantDescription::PushConstantDescription(backend::Program const& program) noexcept {
mRangeCount = 0;
for (auto stage : { ShaderStage::VERTEX, ShaderStage::FRAGMENT, ShaderStage::COMPUTE }) {
@@ -163,9 +162,9 @@ PushConstantDescription::PushConstantDescription(backend::Program const& program
}
}
void PushConstantDescription::write(VkCommandBuffer cmdbuf, VkPipelineLayout layout,
void PushConstantDescription::write(VulkanCommands* commands, VkPipelineLayout layout,
backend::ShaderStage stage, uint8_t index, backend::PushConstantVariant const& value) {
VulkanCommandBuffer* cmdbuf = &(commands->get());
uint32_t binaryValue = 0;
UTILS_UNUSED_IN_RELEASE auto const& types = mTypes[(uint8_t) stage];
if (std::holds_alternative<bool>(value)) {
@@ -181,20 +180,34 @@ void PushConstantDescription::write(VkCommandBuffer cmdbuf, VkPipelineLayout lay
int const ival = std::get<int>(value);
binaryValue = *reinterpret_cast<uint32_t const*>(&ival);
}
vkCmdPushConstants(cmdbuf, layout, getVkStage(stage), index * ENTRY_SIZE, ENTRY_SIZE,
vkCmdPushConstants(cmdbuf->buffer(), layout, getVkStage(stage), index * ENTRY_SIZE, ENTRY_SIZE,
&binaryValue);
}
VulkanProgram::VulkanProgram(VkDevice device, Program const& builder) noexcept
: HwProgram(builder.getName()),
VulkanResource(VulkanResourceType::PROGRAM),
mInfo(new(std::nothrow) PipelineInfo(builder)),
mDevice(device) {
constexpr uint8_t UBO_MODULE_OFFSET = (sizeof(UniformBufferBitmask) * 8) / MAX_SHADER_MODULES;
constexpr uint8_t SAMPLER_MODULE_OFFSET = (sizeof(SamplerBitmask) * 8) / MAX_SHADER_MODULES;
constexpr uint8_t INPUT_ATTACHMENT_MODULE_OFFSET =
(sizeof(InputAttachmentBitmask) * 8) / MAX_SHADER_MODULES;
Program::ShaderSource const& blobs = builder.getShadersSource();
auto& modules = mInfo->shaders;
auto const& specializationConstants = builder.getSpecializationConstants();
std::vector<uint32_t> shader;
// TODO: this will be moved out of the shader as the descriptor set layout will be provided by
// Filament instead of parsed from the shaders. See [GDSR] in VulkanDescriptorSetManager.h
UniformBufferBitmask uboMask = 0;
SamplerBitmask samplerMask = 0;
InputAttachmentBitmask inputAttachmentMask = 0;
static_assert(static_cast<ShaderStage>(0) == ShaderStage::VERTEX &&
static_cast<ShaderStage>(1) == ShaderStage::FRAGMENT &&
MAX_SHADER_MODULES == 2);
@@ -211,6 +224,12 @@ VulkanProgram::VulkanProgram(VkDevice device, Program const& builder) noexcept
dataSize = shader.size() * 4;
}
auto const [ubo, sampler, inputAttachment] = getProgramBindings(blob);
uboMask |= (static_cast<UniformBufferBitmask>(ubo) << (UBO_MODULE_OFFSET * i));
samplerMask |= (static_cast<SamplerBitmask>(sampler) << (SAMPLER_MODULE_OFFSET * i));
inputAttachmentMask |= (static_cast<InputAttachmentBitmask>(inputAttachment)
<< (INPUT_ATTACHMENT_MODULE_OFFSET * i));
VkShaderModule& module = modules[i];
VkShaderModuleCreateInfo moduleInfo = {
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
@@ -238,6 +257,40 @@ VulkanProgram::VulkanProgram(VkDevice device, Program const& builder) noexcept
#endif
}
LayoutDescriptionList& layouts = mInfo->layouts;
layouts[0].bindings = utils::FixedCapacityVector<DescriptorSetLayoutBinding>::with_capacity(
countBits(collapseStages(uboMask)));
layouts[1].bindings = utils::FixedCapacityVector<DescriptorSetLayoutBinding>::with_capacity(
countBits(collapseStages(samplerMask)));
layouts[2].bindings = utils::FixedCapacityVector<DescriptorSetLayoutBinding>::with_capacity(
countBits(collapseStages(inputAttachmentMask)));
addDescriptors(uboMask, layouts[0].bindings);
addDescriptors(samplerMask, layouts[1].bindings);
addDescriptors(inputAttachmentMask, layouts[2].bindings);
#if FVK_ENABLED_DEBUG_SAMPLER_NAME
auto& bindingToName = mInfo->bindingToName;
#endif
auto& groupInfo = builder.getSamplerGroupInfo();
auto& bindingToSamplerIndex = mInfo->bindingToSamplerIndex;
auto& bindings = mInfo->bindings;
for (uint8_t groupInd = 0; groupInd < Program::SAMPLER_BINDING_COUNT; groupInd++) {
auto const& group = groupInfo[groupInd];
auto const& samplers = group.samplers;
for (size_t i = 0; i < samplers.size(); ++i) {
uint32_t const binding = samplers[i].binding;
bindingToSamplerIndex[binding] = (groupInd << 8) | (0xff & i);
assert_invariant(bindings.find(binding) == bindings.end());
bindings.insert(binding);
#if FVK_ENABLED_DEBUG_SAMPLER_NAME
bindingToName[binding] = samplers[i].name.c_str();
#endif
}
}
#if FVK_ENABLED(FVK_DEBUG_SHADER_MODULE)
FVK_LOGD << "Created VulkanProgram " << builder << ", shaders = (" << modules[0]
<< ", " << modules[1] << ")" << utils::io::endl;
@@ -252,239 +305,150 @@ VulkanProgram::~VulkanProgram() {
}
// Creates a special "default" render target (i.e. associated with the swap chain)
VulkanRenderTarget::VulkanRenderTarget()
: HwRenderTarget(0, 0),
mOffscreen(false),
mProtected(false),
mInfo(std::make_unique<Auxiliary>()) {
mInfo->rpkey.samples = mInfo->fbkey.samples = 1;
}
VulkanRenderTarget::VulkanRenderTarget() :
HwRenderTarget(0, 0),
VulkanResource(VulkanResourceType::RENDER_TARGET),
mOffscreen(false), mSamples(1) {}
VulkanRenderTarget::~VulkanRenderTarget() = default;
void VulkanRenderTarget::bindToSwapChain(fvkmemory::resource_ptr<VulkanSwapChain> swapchain) {
void VulkanRenderTarget::bindToSwapChain(VulkanSwapChain& swapChain) {
assert_invariant(!mOffscreen);
VkExtent2D const extent = swapchain->getExtent();
VkExtent2D const extent = swapChain.getExtent();
mColor[0] = { .texture = swapChain.getCurrentColor() };
mDepth = { .texture = swapChain.getDepth() };
width = extent.width;
height = extent.height;
mProtected = swapchain->isProtected();
VulkanAttachment color = {};
color.texture = swapchain->getCurrentColor();
mInfo->attachments = {color};
auto& fbkey = mInfo->fbkey;
auto& rpkey = mInfo->rpkey;
rpkey.colorFormat[0] = color.getFormat();
fbkey.width = width;
fbkey.height = height;
fbkey.color[0] = color.getImageView();
fbkey.resolve[0] = VK_NULL_HANDLE;
if (swapchain->getDepth()) {
VulkanAttachment depth = {};
depth.texture = swapchain->getDepth();
mInfo->attachments.push_back(depth);
mInfo->depthIndex = 1;
rpkey.depthFormat = depth.getFormat();
fbkey.depth = depth.getImageView();
} else {
rpkey.depthFormat = VK_FORMAT_UNDEFINED;
fbkey.depth = VK_NULL_HANDLE;
}
mInfo->colors.set(0);
}
VulkanRenderTarget::VulkanRenderTarget(VkDevice device, VkPhysicalDevice physicalDevice,
VulkanContext const& context, fvkmemory::ResourceManager* resourceManager,
VmaAllocator allocator, VulkanCommands* commands, uint32_t width, uint32_t height,
uint8_t samples, VulkanAttachment color[MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT],
VulkanContext const& context, VmaAllocator allocator, VulkanCommands* commands,
uint32_t width, uint32_t height, uint8_t samples,
VulkanAttachment color[MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT],
VulkanAttachment depthStencil[2], VulkanStagePool& stagePool, uint8_t layerCount)
: HwRenderTarget(width, height),
VulkanResource(VulkanResourceType::RENDER_TARGET),
mOffscreen(true),
mProtected(false),
mInfo(std::make_unique<Auxiliary>()) {
auto& depth = depthStencil[0];
mSamples(samples),
mLayerCount(layerCount) {
for (int index = 0; index < MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT; index++) {
mColor[index] = color[index];
}
mDepth = depthStencil[0];
VulkanTexture* depthTexture = (VulkanTexture*) mDepth.texture;
if (samples == 1) {
return;
}
// Constrain the sample count according to both kinds of sample count masks obtained from
// VkPhysicalDeviceProperties. This is consistent with the VulkanTexture constructor.
auto const& limits = context.getPhysicalDeviceLimits();
samples = reduceSampleCount(samples, limits.framebufferDepthSampleCounts &
mSamples = samples = reduceSampleCount(samples, limits.framebufferDepthSampleCounts &
limits.framebufferColorSampleCounts);
auto& rpkey = mInfo->rpkey;
rpkey.samples = samples;
rpkey.depthFormat = depth.getFormat();
rpkey.viewCount = layerCount;
auto& fbkey = mInfo->fbkey;
fbkey.width = width;
fbkey.height = height;
fbkey.samples = samples;
std::vector<VulkanAttachment>& attachments = mInfo->attachments;
std::vector<VulkanAttachment> msaa;
// Create sidecar MSAA textures for color attachments if they don't already exist.
for (int index = 0; index < MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT; index++) {
VulkanAttachment& attachment = color[index];
auto texture = attachment.texture;
if (!texture) {
rpkey.colorFormat[index] = VK_FORMAT_UNDEFINED;
continue;
}
mProtected |= texture->getIsProtected();
attachments.push_back(attachment);
mInfo->colors.set(index);
rpkey.colorFormat[index] = attachment.getFormat();
fbkey.color[index] = attachment.getImageView();
fbkey.resolve[index] = VK_NULL_HANDLE;
if (samples > 1) {
VulkanAttachment msaaAttachment = {};
if (texture->samples == 1) {
auto msaaTexture = initMsaaTexture(texture, device, physicalDevice, context,
allocator, commands, resourceManager, texture->levels, samples, stagePool);
if (msaaTexture && msaaTexture->isTransientAttachment()) {
rpkey.usesLazilyAllocatedMemory |= (1 << index);
}
if (attachment.texture->samples == 1) {
rpkey.needsResolveMask |= (1 << index);
}
msaaAttachment = {
.texture = msaaTexture,
.layerCount = layerCount,
};
fbkey.resolve[index] = attachment.getImageView();
} else {
msaaAttachment = {
.texture = texture,
.layerCount = layerCount,
};
VulkanAttachment const& spec = color[index];
VulkanTexture* texture = (VulkanTexture*) spec.texture;
if (texture && texture->samples == 1) {
auto msTexture = texture->getSidecar();
if (UTILS_UNLIKELY(!msTexture)) {
// TODO: This should be allocated with the ResourceAllocator.
msTexture = new VulkanTexture(device, physicalDevice, context, allocator, commands,
texture->target, ((VulkanTexture const*) texture)->levels, texture->format,
samples, texture->width, texture->height, texture->depth, texture->usage,
stagePool, true /* heap allocated */);
texture->setSidecar(msTexture);
}
fbkey.color[index] = msaaAttachment.getImageView();
msaa.push_back(msaaAttachment);
mMsaaAttachments[index] = {.texture = msTexture};
}
if (texture && texture->samples > 1) {
mMsaaAttachments[index] = mColor[index];
}
}
if (attachments.size() > 0 && samples > 1 && msaa.size() > 0) {
mInfo->msaaIndex = (uint8_t) attachments.size();
attachments.insert(attachments.end(), msaa.begin(), msaa.end());
if (!depthTexture) {
return;
}
if (depth.texture) {
auto depthTexture = depth.texture;
mInfo->depthIndex = (uint8_t) attachments.size();
attachments.push_back(depth);
fbkey.depth = depth.getImageView();
if (samples > 1) {
mInfo->msaaDepthIndex = mInfo->depthIndex;
if (depthTexture->samples == 1) {
// MSAA depth texture must have the mipmap count of 1
uint8_t const msLevel = 1;
// Create sidecar MSAA texture for the depth attachment if it does not already
// exist.
auto msaa = initMsaaTexture(depthTexture, device, physicalDevice, context,
allocator, commands, resourceManager, msLevel, samples, stagePool);
mInfo->msaaDepthIndex = (uint8_t) attachments.size();
attachments.push_back({ .texture = msaa, .layerCount = layerCount });
}
}
// There is no need for sidecar depth if the depth texture is already MSAA.
if (depthTexture->samples > 1) {
mMsaaDepthAttachment = mDepth;
return;
}
// MSAA depth texture must have the mipmap count of 1
uint8_t const msLevel = 1;
// Create sidecar MSAA texture for the depth attachment if it does not already exist.
VulkanTexture* msTexture = depthTexture->getSidecar();
if (UTILS_UNLIKELY(!msTexture)) {
msTexture = new VulkanTexture(device, physicalDevice, context, allocator,
commands, depthTexture->target, msLevel, depthTexture->format, samples,
depthTexture->width, depthTexture->height, depthTexture->depth, depthTexture->usage,
stagePool, true /* heap allocated */);
depthTexture->setSidecar(msTexture);
}
mMsaaDepthAttachment = {
.texture = msTexture,
.level = msLevel,
.layer = mDepth.layer,
};
}
void VulkanRenderTarget::transformClientRectToPlatform(VkRect2D* bounds) const {
auto const& extent = getExtent();
const auto& extent = getExtent();
flipVertically(bounds, extent.height);
clampToFramebuffer(bounds, extent.width, extent.height);
}
void VulkanRenderTarget::transformViewportToPlatform(VkViewport* bounds) const {
void VulkanRenderTarget::transformClientRectToPlatform(VkViewport* bounds) const {
flipVertically(bounds, getExtent().height);
}
VkExtent2D VulkanRenderTarget::getExtent() const {
return {width, height};
}
VulkanAttachment& VulkanRenderTarget::getColor(int target) {
return mColor[target];
}
VulkanAttachment& VulkanRenderTarget::getMsaaColor(int target) {
return mMsaaAttachments[target];
}
VulkanAttachment& VulkanRenderTarget::getDepth() {
return mDepth;
}
VulkanAttachment& VulkanRenderTarget::getMsaaDepth() {
return mMsaaDepthAttachment;
}
uint8_t VulkanRenderTarget::getColorTargetCount(const VulkanRenderPass& pass) const {
if (!mOffscreen) {
return 1;
}
if (pass.currentSubpass == 1) {
return mInfo->colors.count();
}
uint8_t count = 0;
mInfo->colors.forEachSetBit([&count, &pass](size_t index) {
if (!(pass.params.subpassMask & (1 << index))) {
for (uint8_t i = 0; i < MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT; i++) {
if (!mColor[i].texture) {
continue;
}
// NOTE: This must be consistent with VkRenderPass construction (see VulkanFboCache).
if (!(pass.params.subpassMask & (1 << i)) || pass.currentSubpass == 1) {
count++;
}
});
}
return count;
}
void VulkanRenderTarget::emitBarriersBeginRenderPass(VulkanCommandBuffer& commands) {
auto& attachments = mInfo->attachments;
auto samples = mInfo->fbkey.samples;
auto barrier = [&commands](VulkanAttachment& attachment, VulkanLayout const layout) {
auto tex = attachment.texture;
auto const& range = attachment.getSubresourceRange();
if (tex->getLayout(range.baseMipLevel, range.baseArrayLayer) != layout &&
!tex->transitionLayout(&commands, range, layout)) {
// If the layout transition did not emit a barrier, we do it manually here.
tex->samplerToAttachmentBarrier(&commands, range);
}
};
for (size_t i = 0, count = mInfo->colors.count(); i < count; ++i) {
auto& attachment = attachments[i];
auto tex = attachment.texture;
if (samples == 1 || tex->samples == 1) {
barrier(attachment, VulkanLayout::COLOR_ATTACHMENT);
}
}
if (mInfo->msaaIndex != Auxiliary::UNDEFINED_INDEX) {
for (size_t i = mInfo->msaaIndex, count = mInfo->msaaIndex + mInfo->colors.count();
i < count; ++i) {
barrier(attachments[i], VulkanLayout::COLOR_ATTACHMENT);
}
}
if (mInfo->depthIndex != Auxiliary::UNDEFINED_INDEX) {
barrier(attachments[mInfo->depthIndex], VulkanLayout::DEPTH_ATTACHMENT);
}
if (mInfo->msaaDepthIndex != Auxiliary::UNDEFINED_INDEX) {
barrier(attachments[mInfo->msaaDepthIndex], VulkanLayout::DEPTH_ATTACHMENT);
}
}
void VulkanRenderTarget::emitBarriersEndRenderPass(VulkanCommandBuffer& commands) {
if (isSwapChain()) {
return;
}
for (auto& attachment: mInfo->attachments) {
auto const& range = attachment.getSubresourceRange();
bool const isDepth = attachment.isDepth();
auto texture = attachment.texture;
if (isDepth) {
texture->setLayout(range, VulkanFboCache::FINAL_DEPTH_ATTACHMENT_LAYOUT);
if (!texture->transitionLayout(&commands, range, VulkanLayout::DEPTH_SAMPLER)) {
texture->attachmentToSamplerBarrier(&commands, range);
}
} else {
texture->setLayout(range, VulkanFboCache::FINAL_COLOR_ATTACHMENT_LAYOUT);
if (!texture->transitionLayout(&commands, range, VulkanLayout::READ_WRITE)) {
texture->attachmentToSamplerBarrier(&commands, range);
}
}
}
}
VulkanVertexBufferInfo::VulkanVertexBufferInfo(
uint8_t bufferCount, uint8_t attributeCount, AttributeArray const& attributes)
: HwVertexBufferInfo(bufferCount, attributeCount),
VulkanResource(VulkanResourceType::VERTEX_BUFFER_INFO),
mInfo(attributes.size()) {
auto attribDesc = mInfo.mSoa.data<PipelineInfo::ATTRIBUTE_DESCRIPTION>();
auto bufferDesc = mInfo.mSoa.data<PipelineInfo::BUFFER_DESCRIPTION>();
auto offsets = mInfo.mSoa.data<PipelineInfo::OFFSETS>();
@@ -522,15 +486,19 @@ VulkanVertexBufferInfo::VulkanVertexBufferInfo(
}
VulkanVertexBuffer::VulkanVertexBuffer(VulkanContext& context, VulkanStagePool& stagePool,
uint32_t vertexCount, fvkmemory::resource_ptr<VulkanVertexBufferInfo> vbi)
VulkanResourceAllocator* allocator,
uint32_t vertexCount, Handle<HwVertexBufferInfo> vbih)
: HwVertexBuffer(vertexCount),
vbi(vbi),
// TODO: Seems a bit wasteful. can we do better here?
mBuffers(MAX_VERTEX_BUFFER_COUNT) {
VulkanResource(VulkanResourceType::VERTEX_BUFFER),
vbih(vbih),
mBuffers(MAX_VERTEX_BUFFER_COUNT), // TODO: can we do better here?
mResources(allocator) {
}
void VulkanVertexBuffer::setBuffer(fvkmemory::resource_ptr<VulkanBufferObject> bufferObject,
uint32_t index) {
void VulkanVertexBuffer::setBuffer(VulkanResourceAllocator const& allocator,
VulkanBufferObject* bufferObject, uint32_t index) {
VulkanVertexBufferInfo const* const vbi =
const_cast<VulkanResourceAllocator&>(allocator).handle_cast<VulkanVertexBufferInfo*>(vbih);
size_t const count = vbi->getAttributeCount();
VkBuffer* const vkbuffers = getVkBuffers();
int8_t const* const attribToBuffer = vbi->getAttributeToBuffer();
@@ -539,20 +507,87 @@ void VulkanVertexBuffer::setBuffer(fvkmemory::resource_ptr<VulkanBufferObject> b
vkbuffers[attribIndex] = bufferObject->buffer.getGpuBuffer();
}
}
mResources.push_back(bufferObject);
mResources.acquire(bufferObject);
}
VulkanBufferObject::VulkanBufferObject(VmaAllocator allocator, VulkanStagePool& stagePool,
uint32_t byteCount, BufferObjectBinding bindingType)
: HwBufferObject(byteCount),
VulkanResource(VulkanResourceType::BUFFER_OBJECT),
buffer(allocator, stagePool, getBufferObjectUsage(bindingType), byteCount),
bindingType(bindingType) {}
VulkanRenderPrimitive::VulkanRenderPrimitive(PrimitiveType pt,
fvkmemory::resource_ptr<VulkanVertexBuffer> vb,
fvkmemory::resource_ptr<VulkanIndexBuffer> ib)
: HwRenderPrimitive{.type = pt},
vertexBuffer(vb),
indexBuffer(ib) {}
VulkanTimerQuery::VulkanTimerQuery(std::tuple<uint32_t, uint32_t> indices)
: VulkanThreadSafeResource(VulkanResourceType::TIMER_QUERY),
mStartingQueryIndex(std::get<0>(indices)),
mStoppingQueryIndex(std::get<1>(indices)) {}
void VulkanTimerQuery::setFence(std::shared_ptr<VulkanCmdFence> fence) noexcept {
std::unique_lock<utils::Mutex> lock(mFenceMutex);
mFence = fence;
}
bool VulkanTimerQuery::isCompleted() noexcept {
std::unique_lock<utils::Mutex> lock(mFenceMutex);
// QueryValue is a synchronous call and might occur before beginTimerQuery has written anything
// into the command buffer, which is an error according to the validation layer that ships in
// the Android NDK. Even when AVAILABILITY_BIT is set, validation seems to require that the
// timestamp has at least been written into a processed command buffer.
// This fence indicates that the corresponding buffer has been completed.
if (!mFence) {
return false;
}
VkResult status = mFence->status.load(std::memory_order_relaxed);
if (status != VK_SUCCESS) {
return false;
}
return true;
}
VulkanTimerQuery::~VulkanTimerQuery() = default;
VulkanRenderPrimitive::VulkanRenderPrimitive(VulkanResourceAllocator* resourceAllocator,
PrimitiveType pt, Handle<HwVertexBuffer> vbh, Handle<HwIndexBuffer> ibh)
: VulkanResource(VulkanResourceType::RENDER_PRIMITIVE),
mResources(resourceAllocator) {
type = pt;
vertexBuffer = resourceAllocator->handle_cast<VulkanVertexBuffer*>(vbh);
indexBuffer = resourceAllocator->handle_cast<VulkanIndexBuffer*>(ibh);
mResources.acquire(vertexBuffer);
mResources.acquire(indexBuffer);
}
using Bitmask = VulkanDescriptorSetLayout::Bitmask;
Bitmask Bitmask::fromBackendLayout(descset::DescriptorSetLayout const& layout) {
Bitmask mask;
for (auto const& binding: layout.bindings) {
switch (binding.type) {
case descset::DescriptorType::UNIFORM_BUFFER: {
if (binding.flags == descset::DescriptorFlags::DYNAMIC_OFFSET) {
mask.dynamicUbo |= fromStageFlags<UniformBufferBitmask>(binding.stageFlags,
binding.binding);
} else {
mask.ubo |= fromStageFlags<UniformBufferBitmask>(binding.stageFlags,
binding.binding);
}
break;
}
case descset::DescriptorType::SAMPLER: {
mask.sampler |= fromStageFlags<SamplerBitmask>(binding.stageFlags, binding.binding);
break;
}
case descset::DescriptorType::INPUT_ATTACHMENT: {
mask.inputAttachment |=
fromStageFlags<InputAttachmentBitmask>(binding.stageFlags, binding.binding);
break;
}
}
}
return mask;
}
} // namespace filament::backend

View File

@@ -20,64 +20,48 @@
// This needs to be at the top
#include "DriverBase.h"
#include "VulkanAsyncHandles.h"
#include "VulkanBuffer.h"
#include "VulkanFboCache.h"
#include "VulkanResources.h"
#include "VulkanSwapChain.h"
#include "VulkanTexture.h"
#include "VulkanUtility.h"
#include "vulkan/memory/Resource.h"
#include <private/backend/SamplerGroup.h>
#include <backend/Program.h>
#include <utils/bitset.h>
#include <utils/FixedCapacityVector.h>
#include <utils/Mutex.h>
#include <utils/StructureOfArrays.h>
#include <array>
namespace filament::backend {
namespace {
// Counts the total number of descriptors for both vertex and fragment stages.
template<typename Bitmask>
inline uint8_t collapsedCount(Bitmask const& mask) {
static_assert(sizeof(mask) <= 64);
constexpr uint64_t VERTEX_MASK = (1ULL << getFragmentStageShift<Bitmask>()) - 1ULL;
constexpr uint64_t FRAGMENT_MASK = (VERTEX_MASK << getFragmentStageShift<Bitmask>());
uint64_t val = mask.getValue();
val = ((val & VERTEX_MASK) >> getVertexStageShift<Bitmask>()) |
((val & FRAGMENT_MASK) >> getFragmentStageShift<Bitmask>());
return (uint8_t) Bitmask(val).count();
}
} // anonymous namespace
using namespace descset;
class VulkanTimestamps;
struct VulkanBufferObject;
struct VulkanDescriptorSetLayout : public HwDescriptorSetLayout, fvkmemory::Resource {
static constexpr uint8_t UNIQUE_DESCRIPTOR_SET_COUNT = 4;
static constexpr uint8_t MAX_BINDINGS = 25;
using DescriptorSetLayoutArray = std::array<VkDescriptorSetLayout,
VulkanDescriptorSetLayout::UNIQUE_DESCRIPTOR_SET_COUNT>;
struct VulkanDescriptorSetLayout : public VulkanResource {
static constexpr uint8_t UNIQUE_DESCRIPTOR_SET_COUNT = 3;
// The bitmask representation of a set layout.
struct Bitmask {
// TODO: better utiltize the space below and use bitset instead.
UniformBufferBitmask ubo; // 8 bytes
UniformBufferBitmask dynamicUbo; // 8 bytes
SamplerBitmask sampler; // 8 bytes
InputAttachmentBitmask inputAttachment; // 8 bytes
UniformBufferBitmask ubo = 0; // 4 bytes
UniformBufferBitmask dynamicUbo = 0; // 4 bytes
SamplerBitmask sampler = 0; // 8 bytes
InputAttachmentBitmask inputAttachment = 0; // 1 bytes
// Because we're using this struct as hash key, must make it's 8-bytes aligned, with no
// unaccounted bytes.
uint8_t padding0 = 0; // 1 bytes
uint16_t padding1 = 0;// 2 bytes
uint32_t padding2 = 0;// 4 bytes
bool operator==(Bitmask const& right) const {
return ubo == right.ubo && dynamicUbo == right.dynamicUbo && sampler == right.sampler &&
inputAttachment == right.inputAttachment;
}
static Bitmask fromBackendLayout(descset::DescriptorSetLayout const& layout);
};
static_assert(sizeof(Bitmask) == 32);
// This is a convenience struct to quickly check layout compatibility in terms of descriptor set
// pools.
@@ -87,10 +71,6 @@ struct VulkanDescriptorSetLayout : public HwDescriptorSetLayout, fvkmemory::Reso
uint32_t sampler = 0;
uint32_t inputAttachment = 0;
inline uint32_t total() const {
return ubo + dynamicUbo + sampler + inputAttachment;
}
bool operator==(Count const& right) const noexcept {
return ubo == right.ubo && dynamicUbo == right.dynamicUbo && sampler == right.sampler &&
inputAttachment == right.inputAttachment;
@@ -98,10 +78,10 @@ struct VulkanDescriptorSetLayout : public HwDescriptorSetLayout, fvkmemory::Reso
static inline Count fromLayoutBitmask(Bitmask const& mask) {
return {
.ubo = collapsedCount(mask.ubo),
.dynamicUbo = collapsedCount(mask.dynamicUbo),
.sampler = collapsedCount(mask.sampler),
.inputAttachment = collapsedCount(mask.inputAttachment),
.ubo = countBits(collapseStages(mask.ubo)),
.dynamicUbo = countBits(collapseStages(mask.dynamicUbo)),
.sampler = countBits(collapseStages(mask.sampler)),
.inputAttachment = countBits(collapseStages(mask.inputAttachment)),
};
}
@@ -117,71 +97,101 @@ struct VulkanDescriptorSetLayout : public HwDescriptorSetLayout, fvkmemory::Reso
}
};
VulkanDescriptorSetLayout(DescriptorSetLayout const& layout);
static_assert(sizeof(Bitmask) % 8 == 0);
~VulkanDescriptorSetLayout() = default;
explicit VulkanDescriptorSetLayout(VkDevice device, VkDescriptorSetLayoutCreateInfo const& info,
Bitmask const& bitmask);
VkDescriptorSetLayout getVkLayout() const { return mVkLayout; }
void setVkLayout(VkDescriptorSetLayout vklayout) { mVkLayout = vklayout; }
~VulkanDescriptorSetLayout();
VkDevice const mDevice;
VkDescriptorSetLayout const vklayout;
Bitmask const bitmask;
// This is a convenience struct so that we don't have to iterate through all the bits of the
// bitmask (which correspondings to binding indices).
struct _Bindings {
utils::FixedCapacityVector<uint8_t> const ubo;
utils::FixedCapacityVector<uint8_t> const dynamicUbo;
utils::FixedCapacityVector<uint8_t> const sampler;
utils::FixedCapacityVector<uint8_t> const inputAttachment;
} bindings;
Count const count;
private:
VkDescriptorSetLayout mVkLayout = VK_NULL_HANDLE;
template <typename MaskType>
utils::FixedCapacityVector<uint8_t> bits(MaskType mask) {
utils::FixedCapacityVector<uint8_t> ret =
utils::FixedCapacityVector<uint8_t>::with_capacity(countBits(mask));
for (uint8_t i = 0; i < sizeof(mask) * 4; ++i) {
if (mask & (1 << i)) {
ret.push_back(i);
}
}
return ret;
}
_Bindings getBindings(Bitmask const& bitmask) {
auto const uboCollapsed = collapseStages(bitmask.ubo);
auto const dynamicUboCollapsed = collapseStages(bitmask.dynamicUbo);
auto const samplerCollapsed = collapseStages(bitmask.sampler);
auto const inputAttachmentCollapsed = collapseStages(bitmask.inputAttachment);
return {
bits(uboCollapsed),
bits(dynamicUboCollapsed),
bits(samplerCollapsed),
bits(inputAttachmentCollapsed),
};
}
};
struct VulkanDescriptorSet : public HwDescriptorSet, fvkmemory::Resource {
using VulkanDescriptorSetLayoutList = std::array<Handle<VulkanDescriptorSetLayout>,
VulkanDescriptorSetLayout::UNIQUE_DESCRIPTOR_SET_COUNT>;
struct VulkanDescriptorSet : public VulkanResource {
public:
// Because we need to recycle descriptor sets not used, we allow for a callback that the "Pool"
// can use to repackage the vk handle.
using OnRecycle = std::function<void(VulkanDescriptorSet*)>;
using OnRecycle = std::function<void()>;
VulkanDescriptorSet(VkDescriptorSet rawSet,
UniformBufferBitmask const& dynamicUboMask,
uint8_t uniqueDynamicUboCount,
OnRecycle&& onRecycleFn)
: vkSet(rawSet),
dynamicUboMask(dynamicUboMask),
uniqueDynamicUboCount(uniqueDynamicUboCount),
VulkanDescriptorSet(VulkanResourceAllocator* allocator,
VkDescriptorSet rawSet, OnRecycle&& onRecycleFn)
: VulkanResource(VulkanResourceType::DESCRIPTOR_SET),
resources(allocator),
vkSet(rawSet),
mOnRecycleFn(std::move(onRecycleFn)) {}
~VulkanDescriptorSet() {
if (mOnRecycleFn) {
mOnRecycleFn(this);
mOnRecycleFn();
}
}
void setOffsets(backend::DescriptorSetOffsetArray&& offsets) noexcept {
mOffsets = std::move(offsets);
}
backend::DescriptorSetOffsetArray const* getOffsets() {
return &mOffsets;
}
void acquire(fvkmemory::resource_ptr<VulkanTexture> texture);
void acquire(fvkmemory::resource_ptr<VulkanBufferObject> buffer);
// TODO: maybe change to fixed size for performance.
VulkanAcquireOnlyResourceManager resources;
VkDescriptorSet const vkSet;
UniformBufferBitmask const dynamicUboMask;
uint8_t const uniqueDynamicUboCount;
private:
backend::DescriptorSetOffsetArray mOffsets;
std::vector<fvkmemory::resource_ptr<fvkmemory::Resource>> mResources;
OnRecycle mOnRecycleFn;
};
using VulkanDescriptorSetList = std::array<Handle<VulkanDescriptorSet>,
VulkanDescriptorSetLayout::UNIQUE_DESCRIPTOR_SET_COUNT>;
using PushConstantNameArray = utils::FixedCapacityVector<char const*>;
using PushConstantNameByStage = std::array<PushConstantNameArray, Program::SHADER_TYPE_COUNT>;
struct PushConstantDescription {
explicit PushConstantDescription(backend::Program const& program) noexcept;
VkPushConstantRange const* getVkRanges() const noexcept { return mRanges; }
uint32_t getVkRangeCount() const noexcept { return mRangeCount; }
void write(VkCommandBuffer cmdbuf, VkPipelineLayout layout, backend::ShaderStage stage,
void write(VulkanCommands* commands, VkPipelineLayout layout, backend::ShaderStage stage,
uint8_t index, backend::PushConstantVariant const& value);
private:
@@ -192,10 +202,12 @@ private:
uint32_t mRangeCount;
};
struct VulkanProgram : public HwProgram, fvkmemory::Resource {
struct VulkanProgram : public HwProgram, VulkanResource {
using BindingList = CappedArray<uint16_t, MAX_SAMPLER_COUNT>;
VulkanProgram(VkDevice device, Program const& builder) noexcept;
~VulkanProgram();
inline VkShaderModule getVertexShader() const {
@@ -204,6 +216,24 @@ struct VulkanProgram : public HwProgram, fvkmemory::Resource {
inline VkShaderModule getFragmentShader() const { return mInfo->shaders[1]; }
inline utils::FixedCapacityVector<uint16_t> const& getBindingToSamplerIndex() const {
return mInfo->bindingToSamplerIndex;
}
// Get a list of the sampler binding indices so that we don't have to loop through all possible
// samplers.
inline BindingList const& getBindings() const { return mInfo->bindings; }
// TODO: this is currently not used. This will replace getLayoutDescriptionList below.
// inline descset::DescriptorSetLayout const& getLayoutDescription() const {
// return mInfo->layout;
// }
// In the usual case, we would have just one layout per program. But in the current setup, we
// have a set/layout for each descriptor type. This will be changed in the future.
using LayoutDescriptionList = std::array<descset::DescriptorSetLayout,
VulkanDescriptorSetLayout::UNIQUE_DESCRIPTOR_SET_COUNT>;
inline LayoutDescriptionList const& getLayoutDescriptionList() const { return mInfo->layouts; }
inline uint32_t getPushConstantRangeCount() const {
return mInfo->pushConstantDescription.getVkRangeCount();
}
@@ -212,9 +242,9 @@ struct VulkanProgram : public HwProgram, fvkmemory::Resource {
return mInfo->pushConstantDescription.getVkRanges();
}
inline void writePushConstant(VkCommandBuffer cmdbuf, VkPipelineLayout layout,
inline void writePushConstant(VulkanCommands* commands, VkPipelineLayout layout,
backend::ShaderStage stage, uint8_t index, backend::PushConstantVariant const& value) {
mInfo->pushConstantDescription.write(cmdbuf, layout, stage, index, value);
mInfo->pushConstantDescription.write(commands, layout, stage, index, value);
}
#if FVK_ENABLED_DEBUG_SAMPLER_NAME
@@ -230,11 +260,30 @@ struct VulkanProgram : public HwProgram, fvkmemory::Resource {
private:
struct PipelineInfo {
explicit PipelineInfo(backend::Program const& program) noexcept
: pushConstantDescription(program)
: bindingToSamplerIndex(MAX_SAMPLER_COUNT, 0xffff),
pushConstantDescription(program)
#if FVK_ENABLED_DEBUG_SAMPLER_NAME
, bindingToName(MAX_SAMPLER_COUNT, "")
#endif
{}
BindingList bindings;
// We store the samplerGroupIndex as the top 8-bit and the index within each group as the lower 8-bit.
utils::FixedCapacityVector<uint16_t> bindingToSamplerIndex;
VkShaderModule shaders[MAX_SHADER_MODULES] = { VK_NULL_HANDLE };
// TODO: Use this instead of `layouts` after Filament-side Descriptor Set API is in place.
// descset::DescriptorSetLayout layout;
LayoutDescriptionList layouts;
PushConstantDescription pushConstantDescription;
#if FVK_ENABLED_DEBUG_SAMPLER_NAME
// We store the sampler name mapped from binding index (only for debug purposes).
utils::FixedCapacityVector<std::string> bindingToName;
#endif
};
PipelineInfo* mInfo;
@@ -249,88 +298,45 @@ private:
//
// We use private inheritance to shield clients from the width / height fields in HwRenderTarget,
// which are not representative when this is the default render target.
struct VulkanRenderTarget : private HwRenderTarget, fvkmemory::Resource {
struct VulkanRenderTarget : private HwRenderTarget, VulkanResource {
// Creates an offscreen render target.
VulkanRenderTarget(VkDevice device, VkPhysicalDevice physicalDevice,
VulkanContext const& context, fvkmemory::ResourceManager* resourceManager,
VmaAllocator allocator, VulkanCommands* commands, uint32_t width, uint32_t height,
VulkanContext const& context, VmaAllocator allocator,
VulkanCommands* commands, uint32_t width, uint32_t height,
uint8_t samples, VulkanAttachment color[MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT],
VulkanAttachment depthStencil[2], VulkanStagePool& stagePool, uint8_t layerCount);
~VulkanRenderTarget();
// Creates a special "default" render target (i.e. associated with the swap chain)
explicit VulkanRenderTarget();
void transformClientRectToPlatform(VkRect2D* bounds) const;
void transformViewportToPlatform(VkViewport* bounds) const;
inline VkExtent2D getExtent() const {
return {width, height};
}
inline VulkanAttachment& getColor0() const {
assert_invariant(mInfo->colors[0]);
return mInfo->attachments[0];
}
inline VulkanAttachment& getDepth() const {
assert_invariant(hasDepth());
if (mInfo->fbkey.samples == 1) {
return mInfo->attachments[mInfo->depthIndex];
}
return mInfo->attachments[mInfo->msaaDepthIndex];
}
inline VulkanFboCache::RenderPassKey const& getRenderPassKey() const {
return mInfo->rpkey;
}
inline VulkanFboCache::FboKey const& getFboKey() const {
return mInfo->fbkey;
}
inline uint8_t getSamples() const {
return mInfo->fbkey.samples;
}
uint8_t getColorTargetCount(VulkanRenderPass const& pass) const;
inline bool hasDepth() const { return mInfo->depthIndex != Auxiliary::UNDEFINED_INDEX; }
inline bool isSwapChain() const { return !mOffscreen; }
inline bool isProtected() const { return mProtected; }
void bindToSwapChain(fvkmemory::resource_ptr<VulkanSwapChain> swapchain);
void emitBarriersBeginRenderPass(VulkanCommandBuffer& commands);
void emitBarriersEndRenderPass(VulkanCommandBuffer& commands);
void transformClientRectToPlatform(VkViewport* bounds) const;
VkExtent2D getExtent() const;
// We return references in the following methods to avoid a copy.
VulkanAttachment& getColor(int target);
VulkanAttachment& getMsaaColor(int target);
VulkanAttachment& getDepth();
VulkanAttachment& getMsaaDepth();
uint8_t getColorTargetCount(const VulkanRenderPass& pass) const;
uint8_t getSamples() const { return mSamples; }
uint8_t getLayerCount() const { return mLayerCount; }
bool hasDepth() const { return mDepth.texture; }
bool isSwapChain() const { return !mOffscreen; }
void bindToSwapChain(VulkanSwapChain& surf);
private:
struct Auxiliary {
static constexpr int8_t UNDEFINED_INDEX = -1;
explicit Auxiliary() noexcept = default;
VulkanFboCache::RenderPassKey rpkey = {};
VulkanFboCache::FboKey fbkey = {};
std::vector<VulkanAttachment> attachments;
utils::bitset32 colors;
int8_t depthIndex = UNDEFINED_INDEX;
int8_t msaaDepthIndex = UNDEFINED_INDEX;
int8_t msaaIndex = UNDEFINED_INDEX;
};
bool const mOffscreen;
bool mProtected;
std::unique_ptr<Auxiliary> mInfo;
VulkanAttachment mColor[MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT] = {};
VulkanAttachment mDepth = {};
VulkanAttachment mMsaaAttachments[MRT::MAX_SUPPORTED_RENDER_TARGET_COUNT] = {};
VulkanAttachment mMsaaDepthAttachment = {};
const bool mOffscreen : 1;
uint8_t mSamples : 7;
uint8_t mLayerCount = 1;
};
struct VulkanBufferObject;
struct VulkanVertexBufferInfo : public HwVertexBufferInfo, fvkmemory::Resource {
struct VulkanVertexBufferInfo : public HwVertexBufferInfo, VulkanResource {
VulkanVertexBufferInfo(uint8_t bufferCount, uint8_t attributeCount,
AttributeArray const& attributes);
@@ -377,24 +383,33 @@ private:
PipelineInfo mInfo;
};
struct VulkanVertexBuffer : public HwVertexBuffer, fvkmemory::Resource {
VulkanVertexBuffer(VulkanContext& context, VulkanStagePool& stagePool, uint32_t vertexCount,
fvkmemory::resource_ptr<VulkanVertexBufferInfo> vbi);
void setBuffer(fvkmemory::resource_ptr<VulkanBufferObject> bufferObject, uint32_t index);
struct VulkanVertexBuffer : public HwVertexBuffer, VulkanResource {
VulkanVertexBuffer(VulkanContext& context, VulkanStagePool& stagePool,
VulkanResourceAllocator* allocator,
uint32_t vertexCount, Handle<HwVertexBufferInfo> vbih);
inline VkBuffer const* getVkBuffers() const { return mBuffers.data(); }
inline VkBuffer* getVkBuffers() { return mBuffers.data(); }
fvkmemory::resource_ptr<VulkanVertexBufferInfo> vbi;
void setBuffer(VulkanResourceAllocator const& allocator,
VulkanBufferObject* bufferObject, uint32_t index);
inline VkBuffer const* getVkBuffers() const {
return mBuffers.data();
}
inline VkBuffer* getVkBuffers() {
return mBuffers.data();
}
Handle<HwVertexBufferInfo> vbih;
private:
utils::FixedCapacityVector<VkBuffer> mBuffers;
std::vector<fvkmemory::resource_ptr<VulkanBufferObject>> mResources;
FixedSizeVulkanResourceManager<MAX_VERTEX_BUFFER_COUNT> mResources;
};
struct VulkanIndexBuffer : public HwIndexBuffer, fvkmemory::Resource {
struct VulkanIndexBuffer : public HwIndexBuffer, VulkanResource {
VulkanIndexBuffer(VmaAllocator allocator, VulkanStagePool& stagePool, uint8_t elementSize,
uint32_t indexCount)
: HwIndexBuffer(elementSize, indexCount),
VulkanResource(VulkanResourceType::INDEX_BUFFER),
buffer(allocator, stagePool, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, elementSize * indexCount),
indexType(elementSize == 2 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32) {}
@@ -402,7 +417,7 @@ struct VulkanIndexBuffer : public HwIndexBuffer, fvkmemory::Resource {
const VkIndexType indexType;
};
struct VulkanBufferObject : public HwBufferObject, fvkmemory::Resource {
struct VulkanBufferObject : public HwBufferObject, VulkanResource {
VulkanBufferObject(VmaAllocator allocator, VulkanStagePool& stagePool, uint32_t byteCount,
BufferObjectBinding bindingType);
@@ -410,13 +425,63 @@ struct VulkanBufferObject : public HwBufferObject, fvkmemory::Resource {
const BufferObjectBinding bindingType;
};
struct VulkanRenderPrimitive : public HwRenderPrimitive, fvkmemory::Resource {
VulkanRenderPrimitive(PrimitiveType pt, fvkmemory::resource_ptr<VulkanVertexBuffer> vb,
fvkmemory::resource_ptr<VulkanIndexBuffer> ib);
~VulkanRenderPrimitive() = default;
struct VulkanSamplerGroup : public HwSamplerGroup, VulkanResource {
// NOTE: we have to use out-of-line allocation here because the size of a Handle<> is limited
std::unique_ptr<SamplerGroup> sb;// FIXME: this shouldn't depend on filament::SamplerGroup
explicit VulkanSamplerGroup(size_t size) noexcept
: VulkanResource(VulkanResourceType::SAMPLER_GROUP),
sb(new SamplerGroup(size)) {}
};
fvkmemory::resource_ptr<VulkanVertexBuffer> vertexBuffer;
fvkmemory::resource_ptr<VulkanIndexBuffer> indexBuffer;
struct VulkanRenderPrimitive : public HwRenderPrimitive, VulkanResource {
VulkanRenderPrimitive(VulkanResourceAllocator* resourceAllocator,
PrimitiveType pt, Handle<HwVertexBuffer> vbh, Handle<HwIndexBuffer> ibh);
~VulkanRenderPrimitive() {
mResources.clear();
}
VulkanVertexBuffer* vertexBuffer = nullptr;
VulkanIndexBuffer* indexBuffer = nullptr;
private:
// Keep references to the vertex buffer and the index buffer.
FixedSizeVulkanResourceManager<2> mResources;
};
struct VulkanFence : public HwFence, VulkanResource {
VulkanFence()
: VulkanResource(VulkanResourceType::FENCE) {}
explicit VulkanFence(std::shared_ptr<VulkanCmdFence> fence)
: VulkanResource(VulkanResourceType::FENCE),
fence(fence) {}
std::shared_ptr<VulkanCmdFence> fence;
};
struct VulkanTimerQuery : public HwTimerQuery, VulkanThreadSafeResource {
explicit VulkanTimerQuery(std::tuple<uint32_t, uint32_t> indices);
~VulkanTimerQuery();
void setFence(std::shared_ptr<VulkanCmdFence> fence) noexcept;
bool isCompleted() noexcept;
uint32_t getStartingQueryIndex() const {
return mStartingQueryIndex;
}
uint32_t getStoppingQueryIndex() const {
return mStoppingQueryIndex;
}
private:
uint32_t mStartingQueryIndex;
uint32_t mStoppingQueryIndex;
std::shared_ptr<VulkanCmdFence> mFence;
utils::Mutex mFenceMutex;
};
inline constexpr VkBufferUsageFlagBits getBufferObjectUsage(

View File

@@ -131,18 +131,14 @@ getVkTransition(const VulkanLayoutTransition& transition) {
}// anonymous namespace
bool transitionLayout(VkCommandBuffer cmdbuffer,
void transitionLayout(VkCommandBuffer cmdbuffer,
VulkanLayoutTransition transition) {
if (transition.oldLayout == transition.newLayout) {
return false;
return;
}
auto [srcAccessMask, dstAccessMask, srcStage, dstStage, oldLayout, newLayout]
= getVkTransition(transition);
if (oldLayout == newLayout) {
return false;
}
assert_invariant(transition.image != VK_NULL_HANDLE && "No image for transition");
VkImageMemoryBarrier barrier = {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
@@ -156,7 +152,6 @@ bool transitionLayout(VkCommandBuffer cmdbuffer,
.subresourceRange = transition.subresources,
};
vkCmdPipelineBarrier(cmdbuffer, srcStage, dstStage, 0, 0, nullptr, 0, nullptr, 1, &barrier);
return true;
}
}// namespace filament::backend

View File

@@ -76,6 +76,36 @@ inline VkImageViewType getViewType(SamplerType target) {
}
}
inline VulkanLayout getDefaultLayout(TextureUsage usage) {
if (any(usage & TextureUsage::DEPTH_ATTACHMENT)) {
if (any(usage & TextureUsage::SAMPLEABLE)) {
return VulkanLayout::DEPTH_SAMPLER;
} else {
return VulkanLayout::DEPTH_ATTACHMENT;
}
}
if (any(usage & TextureUsage::COLOR_ATTACHMENT)) {
return VulkanLayout::COLOR_ATTACHMENT;
}
// Finally, the layout for an immutable texture is optimal read-only.
return VulkanLayout::READ_ONLY;
}
inline VulkanLayout getDefaultLayout(VkImageUsageFlags vkusage) {
TextureUsage usage{};
if (vkusage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
usage = usage | TextureUsage::DEPTH_ATTACHMENT;
}
if (vkusage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
usage = usage | TextureUsage::COLOR_ATTACHMENT;
}
if (vkusage & VK_IMAGE_USAGE_SAMPLED_BIT) {
usage = usage | TextureUsage::SAMPLEABLE;
}
return getDefaultLayout(usage);
}
constexpr inline VkImageLayout getVkLayout(VulkanLayout layout) {
switch (layout) {
case VulkanLayout::UNDEFINED:
@@ -105,9 +135,7 @@ constexpr inline VkImageLayout getVkLayout(VulkanLayout layout) {
}
}
// Returns true if a transition has been added to the command buffer, false otherwis (where there is
// no transition necessary).
bool transitionLayout(VkCommandBuffer cmdbuffer, VulkanLayoutTransition transition);
void transitionLayout(VkCommandBuffer cmdbuffer, VulkanLayoutTransition transition);
} // namespace imgutil

Some files were not shown because too many files have changed in this diff Show More