Compare commits

...

8 Commits

Author SHA1 Message Date
Run Yu
44ef0af4a4 immediately GetMappedRange after MapAsync still would return null pointer 2025-11-21 16:53:03 -05:00
Run Yu
c9b5bfc4a4 Use a stage pool for WebGPU, but somehow GetMappedRange would sometimes return null, which breaks the code. 2025-11-21 10:57:02 -05:00
Doris Wu
f07176c0a2 Try to fix test_UboBatching (#9448) 2025-11-20 15:14:19 +08:00
Doris Wu
15db141c7a Add some unit tests for UboManager (#9446) 2025-11-20 14:28:13 +08:00
Doris Wu
d4bbb7c591 buffer update opt: Some optimizations (#9438) 2025-11-20 00:52:04 +00:00
Sungun Park
92e620d2ad Simplify buffer object creation (#9436)
Simplify the buffer object creation logic to streamline and help make
the future integration of asynchronous features easier.
2025-11-19 22:03:05 +00:00
Mathias Agopian
311104da97 update google benchmark library to 1.9.4 (#9441)
* benchmark: update README and add update script

* update google benchmark library to 1.9.4

* update tnt CMakeLists to match the library new version
2025-11-19 11:50:34 -08:00
Filament Bot
3127632f96 [automated] Updating /docs due to commit 59f611b
Full commit hash is 59f611bfde

DOCS_ALLOW_DIRECT_EDITS
2025-11-19 19:32:56 +00:00
202 changed files with 20125 additions and 7899 deletions

View File

@@ -181,7 +181,7 @@ important for <code>matc</code> (material compiler).</p>
}
dependencies {
implementation 'com.google.android.filament:filament-android:1.66.2'
implementation 'com.google.android.filament:filament-android:1.67.1'
}
</code></pre>
<p>Here are all the libraries available in the group <code>com.google.android.filament</code>:</p>
@@ -196,7 +196,7 @@ dependencies {
</div>
<h3 id="ios"><a class="header" href="#ios">iOS</a></h3>
<p>iOS projects can use CocoaPods to install the latest release:</p>
<pre><code class="language-shell">pod 'Filament', '~&gt; 1.66.2'
<pre><code class="language-shell">pod 'Filament', '~&gt; 1.67.1'
</code></pre>
<h2 id="documentation"><a class="header" href="#documentation">Documentation</a></h2>
<ul>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -316,6 +316,8 @@ if (FILAMENT_SUPPORTS_WEBGPU)
src/webgpu/WebGPURenderPrimitive.h
src/webgpu/WebGPURenderTarget.cpp
src/webgpu/WebGPURenderTarget.h
src/webgpu/WebGPUStagePool.cpp
src/webgpu/WebGPUStagePool.h
src/webgpu/WebGPUStrings.h
src/webgpu/WebGPUSwapChain.cpp
src/webgpu/WebGPUSwapChain.h

View File

@@ -18,6 +18,7 @@
#include "WebGPUConstants.h"
#include "WebGPUQueueManager.h"
#include "WebGPUStagePool.h"
#include "DriverBase.h"
#include <backend/BufferDescriptor.h>
@@ -29,6 +30,7 @@
#include <cstdint>
#include <cstring>
#include <iostream>
namespace filament::backend {
@@ -65,7 +67,7 @@ WebGPUBufferBase::WebGPUBufferBase(wgpu::Device const& device, const wgpu::Buffe
// of 4 by padding with zeros.
void WebGPUBufferBase::updateGPUBuffer(BufferDescriptor const& bufferDescriptor,
const uint32_t byteOffset, wgpu::Device const& device,
WebGPUQueueManager* const webGPUQueueManager) {
WebGPUQueueManager* const webGPUQueueManager, WebGPUStagePool* const webGPUStagePool) {
FILAMENT_CHECK_PRECONDITION(bufferDescriptor.buffer)
<< "updateGPUBuffer called with a null buffer";
FILAMENT_CHECK_PRECONDITION(bufferDescriptor.size + byteOffset <= mBuffer.GetSize())
@@ -79,34 +81,54 @@ void WebGPUBufferBase::updateGPUBuffer(BufferDescriptor const& bufferDescriptor,
// This may have some performance implications. That should be investigated later.
assert_invariant(mBuffer.GetUsage() & wgpu::BufferUsage::CopyDst);
// Calculate some alignment related sizes
// // Calculate some alignment related sizes
const size_t remainder = bufferDescriptor.size % FILAMENT_WEBGPU_BUFFER_SIZE_MODULUS;
const size_t mainBulk = bufferDescriptor.size - remainder;
const size_t stagingBufferSize =
remainder == 0 ? bufferDescriptor.size : mainBulk + FILAMENT_WEBGPU_BUFFER_SIZE_MODULUS;
// create a staging buffer
wgpu::BufferDescriptor descriptor{
.label = "Filament WebGPU Staging Buffer",
.usage = wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc,
.size = stagingBufferSize,
.mappedAtCreation = true };
wgpu::Buffer stagingBuffer = device.CreateBuffer(&descriptor);
Stage stage = webGPUStagePool->acquireBuffer(stagingBufferSize);
void* mappedRange = stagingBuffer.GetMappedRange();
memcpy(mappedRange, bufferDescriptor.buffer, bufferDescriptor.size);
std::string mappedRangeIsNull = stage.mappedRange
? "no"
: "yes";
std::cout << "Run Yu: got mapped range on the staging buffer with size "
<< stage.buffer.GetSize() << " and it is null? " << mappedRangeIsNull << std::endl;
memcpy(stage.mappedRange, bufferDescriptor.buffer, bufferDescriptor.size);
// Make sure the padded memory is set to 0 to have deterministic behaviors
if (remainder != 0) {
uint8_t* paddingStart = static_cast<uint8_t*>(mappedRange) + bufferDescriptor.size;
memset(paddingStart, 0, FILAMENT_WEBGPU_BUFFER_SIZE_MODULUS - remainder);
}
stagingBuffer.Unmap();
stage.buffer.Unmap();
std::cout << "Run Yu: about to issue copy command with actual staging buffer of size "
<< stage.buffer.GetSize() << ", and computed size of " << stagingBufferSize
<< ". The mBuffer size is " << mBuffer.GetSize() << std::endl;
// Copy the staging buffer contents to the destination buffer.
webGPUQueueManager->getCommandEncoder().CopyBufferToBuffer(stagingBuffer, 0, mBuffer,
byteOffset, stagingBufferSize);
webGPUQueueManager->getCommandEncoder().CopyBufferToBuffer(stage.buffer, 0, mBuffer,
byteOffset,
remainder == 0 ? bufferDescriptor.size
: mainBulk + FILAMENT_WEBGPU_BUFFER_SIZE_MODULUS);
webGPUQueueManager->flush();
struct UserData final {
wgpu::Buffer stagingBuffer;
WebGPUStagePool* webGPUStagePool;
};
auto userData = std::make_unique<UserData>(
UserData{ .stagingBuffer = stage.buffer, .webGPUStagePool = webGPUStagePool });
stage.buffer.MapAsync(wgpu::MapMode::Write, 0, stagingBufferSize,
wgpu::CallbackMode::AllowSpontaneous,
[data = std::move(userData)](wgpu::MapAsyncStatus status, const char* message) {
if (UTILS_LIKELY(status == wgpu::MapAsyncStatus::Success)) {
std::cout << "Run Yu: successfully mapped a buffer with size "
<< data->stagingBuffer.GetSize() << std::endl;
void* mappedRange = data->stagingBuffer.GetMappedRange();
if (!mappedRange) {
std::cout << "Run Yu: MAPPED RANGE IS NULL RIGHT AWAY!!\n";
}
data->webGPUStagePool->addBufferToPool(data->stagingBuffer, mappedRange);
} else {
std::cout << "Run Yu: MAPPING UNSUCCESSFUL!!\n";
}
});
}
} // namespace filament::backend

View File

@@ -25,6 +25,7 @@ namespace filament::backend {
class BufferDescriptor;
class WebGPUQueueManager;
class WebGPUStagePool;
/**
* A base class for WebGPU buffer objects, providing common functionality for creating and
@@ -40,7 +41,7 @@ public:
* ensures the calls happen in the expected sequence.
*/
void updateGPUBuffer(BufferDescriptor const&, uint32_t byteOffset, wgpu::Device const& device,
WebGPUQueueManager* const webGPUQueueManager);
WebGPUQueueManager* const webGPUQueueManager, WebGPUStagePool* const webGPUStagePool);
[[nodiscard]] wgpu::Buffer const& getBuffer() const { return mBuffer; }

View File

@@ -107,6 +107,7 @@ WebGPUDriver::WebGPUDriver(WebGPUPlatform& platform,
mAdapter{ mPlatform.requestAdapter(nullptr) },
mDevice{ mPlatform.requestDevice(mAdapter) },
mQueueManager{ mDevice },
mStagePool{ mDevice },
mPipelineLayoutCache{ mDevice },
mPipelineCache{ mDevice },
mRenderPassMipmapGenerator{ mDevice, &mQueueManager },
@@ -856,7 +857,7 @@ void WebGPUDriver::updateIndexBuffer(Handle<HwIndexBuffer> indexBufferHandle,
// draw calls are made.
flush();
handleCast<WebGPUIndexBuffer>(indexBufferHandle)
->updateGPUBuffer(bufferDescriptor, byteOffset, mDevice, &mQueueManager);
->updateGPUBuffer(bufferDescriptor, byteOffset, mDevice, &mQueueManager, &mStagePool);
scheduleDestroy(std::move(bufferDescriptor));
}
@@ -867,14 +868,14 @@ void WebGPUDriver::updateBufferObject(Handle<HwBufferObject> bufferObjectHandle,
// draw calls are made.
flush();
handleCast<WebGPUBufferObject>(bufferObjectHandle)
->updateGPUBuffer(bufferDescriptor, byteOffset, mDevice, &mQueueManager);
->updateGPUBuffer(bufferDescriptor, byteOffset, mDevice, &mQueueManager, &mStagePool);
scheduleDestroy(std::move(bufferDescriptor));
}
void WebGPUDriver::updateBufferObjectUnsynchronized(Handle<HwBufferObject> bufferObjectHandle,
BufferDescriptor&& bufferDescriptor, const uint32_t byteOffset) {
handleCast<WebGPUBufferObject>(bufferObjectHandle)
->updateGPUBuffer(bufferDescriptor, byteOffset, mDevice, &mQueueManager);
->updateGPUBuffer(bufferDescriptor, byteOffset, mDevice, &mQueueManager, &mStagePool);
scheduleDestroy(std::move(bufferDescriptor));
}

View File

@@ -25,6 +25,7 @@
#include "webgpu/WebGPUPipelineLayoutCache.h"
#include "webgpu/WebGPURenderPassMipmapGenerator.h"
#include "webgpu/WebGPUQueueManager.h"
#include "webgpu/WebGPUStagePool.h"
#include "webgpu/utils/AsyncTaskCounter.h"
#include <backend/platforms/WebGPUPlatform.h>
@@ -81,6 +82,7 @@ private:
wgpu::Device mDevice = nullptr;
wgpu::Limits mDeviceLimits = {};
WebGPUQueueManager mQueueManager;
WebGPUStagePool mStagePool;
void* mNativeWindow = nullptr;
WebGPUSwapChain* mSwapChain = nullptr;
uint64_t mNextFakeHandle = 1;

View File

@@ -0,0 +1,86 @@
/*
* Copyright (C) 2025 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "WebGPUStagePool.h"
#include "WebGPUConstants.h"
#include <iostream>
namespace filament::backend {
WebGPUStagePool::WebGPUStagePool(wgpu::Device const& device) : mDevice(device) {}
WebGPUStagePool::~WebGPUStagePool() = default;
Stage WebGPUStagePool::acquireBuffer(size_t requiredSize) {
std::cout << "Run Yu: required size in acquireBuffer: " << requiredSize << std::endl;
std::cout << "Run Yu: the pool size is " << mBuffers.size() << std::endl;
{
std::lock_guard<std::mutex> lock(mMutex);
auto iter = mBuffers.lower_bound(requiredSize);
if (iter != mBuffers.end()) {
const Stage& fromPool = iter->second;
std::cout << "Run Yu: found buffer in the pool with size " << fromPool.buffer.GetSize()
<< std::endl;
if (fromPool.buffer.GetMapState() != wgpu::BufferMapState::Mapped) {
std::cout << "Run Yu: buffer from pool is not mapped!!" << std::endl;
}
Stage result{ .buffer = fromPool.buffer, .mappedRange = fromPool.mappedRange };
mBuffers.erase(iter);
return result;
}
}
wgpu::Buffer newBuffer = createNewBuffer(requiredSize);
return { .buffer = newBuffer, .mappedRange = newBuffer.GetMappedRange() };
}
void WebGPUStagePool::addBufferToPool(wgpu::Buffer buffer, void* mappedRange) {
std::lock_guard<std::mutex> lock(mMutex);
std::cout << "Run Yu: adding buffer to the pool with size " << buffer.GetSize() << std::endl;
Stage stage {.buffer = buffer, .mappedRange = mappedRange};
mBuffers.emplace(buffer.GetSize(), stage);
std::cout << "Run Yu: added buffer to the pool with size " << buffer.GetSize() << std::endl;
bool allMapped = true;
for (const auto& pair : mBuffers) {
auto state = pair.second.buffer.GetMapState();
if (state != wgpu::BufferMapState::Mapped) {
allMapped = false;
std::cout << "Run Yu: the buffer with size " << pair.second.buffer.GetSize()
<< " is not mapped but somehow was added to the pool, its state is "
<< static_cast<int>(state) << std::endl;
}
}
if (!allMapped) {
std::cout << "Run Yu: found buffers that are not mapped\n";
} else {
std::cout << "Run Yu: all buffers are mapped\n";
}
}
wgpu::Buffer WebGPUStagePool::createNewBuffer(size_t bufferSize) {
std::cout << "Run Yu: creating new buffer with size " << bufferSize << std::endl;
wgpu::BufferDescriptor descriptor{
.label = "Filament WebGPU Staging Buffer",
.usage = wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc,
.size = bufferSize,
.mappedAtCreation = true };
return mDevice.CreateBuffer(&descriptor);
}
} // namespace filament::backend

View File

@@ -0,0 +1,49 @@
/*
* Copyright (C) 2025 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TNT_FILAMENT_BACKEND_WEBGPUSTAGEPOOL_H
#define TNT_FILAMENT_BACKEND_WEBGPUSTAGEPOOL_H
#include <webgpu/webgpu_cpp.h>
#include <map>
#include <mutex>
namespace filament::backend {
struct Stage {
wgpu::Buffer buffer;
void* mappedRange;
};
class WebGPUStagePool {
public:
WebGPUStagePool(wgpu::Device const& device);
~WebGPUStagePool();
Stage acquireBuffer(size_t requiredSize);
void addBufferToPool(wgpu::Buffer buffer, void* mappedRange);
private:
wgpu::Buffer createNewBuffer(size_t bufferSize);
std::multimap<uint32_t, Stage> mBuffers;
mutable std::mutex mMutex;
wgpu::Device mDevice;
};
}
#endif // TNT_FILAMENT_BACKEND_WEBGPUSTAGEPOOL_H

View File

@@ -16,6 +16,7 @@
#include "details/BufferAllocator.h"
#include <private/utils/Tracing.h>
#include <utils/Panic.h>
#include <utils/debug.h>
@@ -169,6 +170,7 @@ void BufferAllocator::releaseGpu(AllocationId id) {
}
void BufferAllocator::releaseFreeSlots() {
FILAMENT_TRACING_CALL(FILAMENT_TRACING_CATEGORY_FILAMENT);
if (!mHasPendingFrees) {
return;
}

View File

@@ -718,8 +718,7 @@ void FEngine::prepare() {
if (useUboBatching) {
assert_invariant(mUboManager != nullptr);
mUboManager->beginFrame(driver, mMaterialInstances);
mUboManager->beginFrame(driver);
}
UboManager* uboManager = mUboManager;
@@ -758,7 +757,7 @@ void FEngine::gc() {
void FEngine::submitFrame() {
if (isUboBatchingEnabled()) {
DriverApi& driver = getDriverApi();
getUboManager()->endFrame(driver, getMaterialInstanceResourceList());
getUboManager()->endFrame(driver);
}
}
@@ -1285,11 +1284,6 @@ UTILS_NOINLINE
bool FEngine::destroy(const FMaterialInstance* p) {
if (p == nullptr) return true;
if (p->isUsingUboBatching()) {
assert_invariant(isUboBatchingEnabled());
mUboManager->retireSlot(p->getAllocationId());
}
// Check that the material instance we're destroying is not in use in the RenderableManager
// To do this, we currently need to inspect all render primitives in the RenderableManager
EntityManager const& em = mEntityManager;

View File

@@ -95,6 +95,7 @@ FMaterialInstance::FMaterialInstance(FEngine& engine, FMaterial const* material,
if (mUseUboBatching) {
mUboData = BufferAllocator::UNALLOCATED;
engine.getUboManager()->manageMaterialInstance(this);
} else {
mUboData = driver.createBufferObject(mUniforms.getSize(), BufferObjectBinding::UNIFORM,
BufferUsage::STATIC, ImmutableCString{ material->getName().c_str_safe() });
@@ -167,6 +168,7 @@ FMaterialInstance::FMaterialInstance(FEngine& engine,
if (mUseUboBatching) {
mUboData = BufferAllocator::UNALLOCATED;
engine.getUboManager()->manageMaterialInstance(this);
} else {
mUboData = driver.createBufferObject(mUniforms.getSize(), BufferObjectBinding::UNIFORM,
BufferUsage::DYNAMIC, ImmutableCString{ material->getName().c_str_safe() });
@@ -211,6 +213,10 @@ FMaterialInstance::~FMaterialInstance() noexcept = default;
void FMaterialInstance::terminate(FEngine& engine) {
FEngine::DriverApi& driver = engine.getDriverApi();
mDescriptorSet.terminate(driver);
if (mUseUboBatching) {
engine.getUboManager()->unmanageMaterialInstance(this);
}
auto* ubHandle = std::get_if<Handle<HwBufferObject>>(&mUboData);
if (ubHandle){
driver.destroyBufferObject(*ubHandle);

View File

@@ -48,6 +48,7 @@ void UboManager::FenceManager::track(DriverApi& driver, std::unordered_set<Alloc
void UboManager::FenceManager::reclaimCompletedResources(DriverApi& driver,
std::function<void(AllocationId)> const& onReclaimed) {
FILAMENT_TRACING_CALL(FILAMENT_TRACING_CATEGORY_FILAMENT);
uint32_t signaledCount = 0;
bool seenSignaledFence = false;
@@ -115,8 +116,7 @@ UboManager::UboManager(DriverApi& driver, allocation_size_t defaultSlotSizeInByt
reallocate(driver, defaultTotalSizeInBytes);
}
void UboManager::beginFrame(DriverApi& driver,
const std::unordered_map<const FMaterial*, ResourceList<FMaterialInstance>>& materialInstances) {
void UboManager::beginFrame(DriverApi& driver) {
FILAMENT_TRACING_CALL(FILAMENT_TRACING_CATEGORY_FILAMENT);
// Check finished frames and decrement GPU count accordingly.
mFenceManager.reclaimCompletedResources(driver,
@@ -126,7 +126,7 @@ void UboManager::beginFrame(DriverApi& driver,
mAllocator.releaseFreeSlots();
// Traverse all MIs and see which of them need slot allocation.
if (allocateOnDemand(materialInstances) == SUCCESS) {
if (allocateOnDemand() == SUCCESS) {
// No need to grow the buffer, so we can just map the buffer for writing and return.
mMemoryMappedBufferHandle = driver.mapBuffer(mUbHandle, 0, mUboSize, MapBufferAccessFlags::WRITE_BIT,
"UboManager");
@@ -135,25 +135,19 @@ void UboManager::beginFrame(DriverApi& driver,
}
// Calculate the required size and grow the Ubo.
const allocation_size_t requiredSize = calculateRequiredSize(materialInstances);
const allocation_size_t requiredSize = calculateRequiredSize();
reallocate(driver, requiredSize);
// Allocate slots for each MI on the new Ubo.
allocateAllInstances(materialInstances);
allocateAllInstances();
// Map the buffer so that we can write to it
mMemoryMappedBufferHandle =
driver.mapBuffer(mUbHandle, 0, mUboSize, MapBufferAccessFlags::WRITE_BIT, "UboManager");
// Invalidate the migrated MIs, so that next commit() call must be triggered.
for (const auto& materialInstance : materialInstances) {
materialInstance.second.forEach([](const FMaterialInstance* mi) {
if (!mi->isUsingUboBatching()) {
return;
}
mi->getUniformBuffer().invalidate();
});
for (const auto* mi : mManagedInstances) {
mi->getUniformBuffer().invalidate();
}
}
@@ -164,24 +158,16 @@ void UboManager::finishBeginFrame(DriverApi& driver) {
}
}
void UboManager::endFrame(DriverApi& driver,
const std::unordered_map<const FMaterial*, ResourceList<FMaterialInstance>>& materialInstances) {
BufferAllocator& allocator = mAllocator;
void UboManager::endFrame(DriverApi& driver) {
std::unordered_set<AllocationId> allocationIds;
for (const auto& materialInstance : materialInstances) {
materialInstance.second.forEach([&allocator, &allocationIds](const FMaterialInstance* mi) {
if (!mi->isUsingUboBatching()) {
return;
}
for (const auto* mi : mManagedInstances) {
const AllocationId id = mi->getAllocationId();
if (UTILS_UNLIKELY(!BufferAllocator::isValid(id))) {
continue;
}
const AllocationId id = mi->getAllocationId();
if (!BufferAllocator::isValid(id)) {
return;
}
allocator.acquireGpu(id);
allocationIds.insert(id);
});
mAllocator.acquireGpu(id);
allocationIds.insert(id);
}
mFenceManager.track(driver, std::move(allocationIds));
@@ -194,76 +180,90 @@ void UboManager::terminate(DriverApi& driver) {
void UboManager::updateSlot(DriverApi& driver, AllocationId id,
BufferDescriptor bufferDescriptor) const {
if (!mMemoryMappedBufferHandle)
if (!mMemoryMappedBufferHandle) {
return;
}
const allocation_size_t offset = mAllocator.getAllocationOffset(id);
driver.copyToMemoryMappedBuffer(mMemoryMappedBufferHandle, offset, std::move(bufferDescriptor));
}
void UboManager::retireSlot(BufferAllocator::AllocationId id) {
if (!BufferAllocator::isValid(id))
return;
mAllocator.retire(id);
void UboManager::manageMaterialInstance(FMaterialInstance* instance) {
mPendingInstances.insert(instance);
}
UboManager::AllocationResult UboManager::allocateOnDemand(
const std::unordered_map<const FMaterial*, ResourceList<FMaterialInstance>>&
materialInstances) {
// Collect all MIs that need allocation into two groups.
std::vector<FMaterialInstance*> newInstances;
std::vector<FMaterialInstance*> existingInstances;
for (const auto& [_, miList] : materialInstances) {
miList.forEach([&](FMaterialInstance* mi) {
if (!mi->isUsingUboBatching()) {
return;
}
if (BufferAllocator::isValid(mi->getAllocationId())) {
existingInstances.push_back(mi);
} else {
newInstances.push_back(mi);
}
});
void UboManager::unmanageMaterialInstance(FMaterialInstance* materialInstance) {
AllocationId id = materialInstance->getAllocationId();
mPendingInstances.erase(materialInstance);
mManagedInstances.erase(materialInstance);
if (!BufferAllocator::isValid(id)) {
return;
}
mAllocator.retire(id);
materialInstance->assignUboAllocation(mUbHandle, BufferAllocator::UNALLOCATED, 0);
}
UboManager::AllocationResult UboManager::allocateOnDemand() {
FILAMENT_TRACING_CALL(FILAMENT_TRACING_CATEGORY_FILAMENT);
bool reallocationNeeded = false;
// Pass 1: Allocate slots for new material instances (that don't have a slot yet).
for (FMaterialInstance* mi : newInstances) {
for (auto* mi : mPendingInstances) {
mManagedInstances.insert(mi);
auto [newId, newOffset] = mAllocator.allocate(mi->getUniformBuffer().getSize());
// Even if the newId is not valid, we assign it to the MI so that the following process knows
// this material instance was not allocated successfully. Then we can calculate the new
// required UBO size properly.
mi->assignUboAllocation(mUbHandle, newId, newOffset);
if (!BufferAllocator::isValid(newId)) {
reallocationNeeded = true;
}
}
mPendingInstances.clear();
// Pass 2: Allocate slots for existing material instances that need to be orphaned.
for (FMaterialInstance* mi : existingInstances) {
if (mi->getUniformBuffer().isDirty() && mAllocator.isLockedByGpu(mi->getAllocationId())) {
mAllocator.retire(mi->getAllocationId());
auto [newId, newOffset] = mAllocator.allocate(mi->getUniformBuffer().getSize());
mi->assignUboAllocation(mUbHandle, newId, newOffset);
if (!BufferAllocator::isValid(newId)) {
reallocationNeeded = true;
}
for (auto* mi: mManagedInstances) {
if (!BufferAllocator::isValid(mi->getAllocationId())) {
continue;
}
// This instance doesn't need orphaning.
if (!mi->getUniformBuffer().isDirty() || !mAllocator.isLockedByGpu(mi->getAllocationId())) {
continue;
}
mAllocator.retire(mi->getAllocationId());
// If the space is already not sufficient, we don't need to give another try on allocation.
if (reallocationNeeded) {
mi->assignUboAllocation(mUbHandle, REALLOCATION_REQUIRED, 0);
continue;
}
auto [newId, newOffset] = mAllocator.allocate(mi->getUniformBuffer().getSize());
// Even if the newId is not valid, we assign it to the MI so that the following process knows
// this material instance was not allocated successfully. Then we can calculate the new
// required UBO size properly.
mi->assignUboAllocation(mUbHandle, newId, newOffset);
if (!BufferAllocator::isValid(newId)) {
reallocationNeeded = true;
}
}
return reallocationNeeded ? REALLOCATION_REQUIRED : SUCCESS;
}
void UboManager::allocateAllInstances(
const std::unordered_map<const FMaterial*, ResourceList<FMaterialInstance>>&
materialInstances) {
for (const auto& [_, miList] : materialInstances) {
miList.forEach([this](FMaterialInstance* mi) {
if (!mi->isUsingUboBatching()) {
return;
}
auto [newId, newOffset] = mAllocator.allocate(mi->getUniformBuffer().getSize());
assert_invariant(BufferAllocator::isValid(newId));
mi->assignUboAllocation(mUbHandle, newId, newOffset);
});
void UboManager::allocateAllInstances() {
for (auto* mi: mManagedInstances) {
auto [newId, newOffset] = mAllocator.allocate(mi->getUniformBuffer().getSize());
assert_invariant(BufferAllocator::isValid(newId));
mi->assignUboAllocation(mUbHandle, newId, newOffset);
}
}
@@ -288,28 +288,19 @@ void UboManager::reallocate(DriverApi& driver, allocation_size_t requiredSize) {
BufferUsage::DYNAMIC | BufferUsage::SHARED_WRITE_BIT);
}
allocation_size_t UboManager::calculateRequiredSize(
const std::unordered_map<const FMaterial*, ResourceList<FMaterialInstance>>&
materialInstances) {
BufferAllocator& allocator = mAllocator;
allocation_size_t UboManager::calculateRequiredSize() {
allocation_size_t newBufferSize = 0;
for (const auto& materialInstance: materialInstances) {
materialInstance.second.forEach([&newBufferSize, &allocator](const FMaterialInstance* mi) {
if (!mi->isUsingUboBatching()) {
return;
}
const AllocationId allocationId = mi->getAllocationId();
if (allocationId == BufferAllocator::REALLOCATION_REQUIRED) {
// For MIs whose parameters have been updated, aside from the slot it is being
// occupied by the GPU, we need to preserve an additional slot for it.
newBufferSize += 2 * allocator.alignUp(mi->getUniformBuffer().getSize());
} else {
newBufferSize += allocator.alignUp(mi->getUniformBuffer().getSize());
}
});
for (const auto* mi: mManagedInstances) {
const AllocationId allocationId = mi->getAllocationId();
if (allocationId == BufferAllocator::REALLOCATION_REQUIRED) {
// For MIs whose parameters have been updated, aside from the slot it is being
// occupied by the GPU, we need to preserve an additional slot for it.
newBufferSize += 2 * mAllocator.alignUp(mi->getUniformBuffer().getSize());
} else {
newBufferSize += mAllocator.alignUp(mi->getUniformBuffer().getSize());
}
}
return allocator.alignUp(newBufferSize * BUFFER_SIZE_GROWTH_MULTIPLIER);
return mAllocator.alignUp(newBufferSize * BUFFER_SIZE_GROWTH_MULTIPLIER);
}
} // namespace filament

View File

@@ -17,7 +17,6 @@
#ifndef TNT_FILAMENT_DETAILS_UBOMANAGER_H
#define TNT_FILAMENT_DETAILS_UBOMANAGER_H
#include "ResourceList.h"
#include "backend/DriverApiForward.h"
#include "details/BufferAllocator.h"
@@ -29,6 +28,8 @@
#include <unordered_set>
#include <vector>
class UboManagerTest;
namespace filament {
class FMaterial;
@@ -96,9 +97,8 @@ public:
// instances with modified uniforms).
// 3. Reallocating a larger shared UBO if the current one is insufficient.
// 4. Mapping the shared UBO into CPU-accessible memory to prepare for uniform data writes.
void beginFrame(backend::DriverApi& driver,
const std::unordered_map<const FMaterial*, ResourceList<FMaterialInstance>>&
materialInstances);
// Note that it must happen before committing all MIs.
void beginFrame(backend::DriverApi& driver);
// Unmap the buffer here
void finishBeginFrame(backend::DriverApi& driver);
@@ -106,23 +106,31 @@ public:
// Create a fence and associate it with a set of allocation ids.
// The gpuUseCount of these allocations will be incremented, and they will be decremented
// After the corresponding frame has been done.
void endFrame(backend::DriverApi& driver,
const std::unordered_map<const FMaterial*, ResourceList<FMaterialInstance>>&
materialInstances);
void endFrame(backend::DriverApi& driver);
void terminate(backend::DriverApi& driver);
void updateSlot(backend::DriverApi& driver, BufferAllocator::AllocationId id,
backend::BufferDescriptor bufferDescriptor) const;
// Call this when a material instance is no longer holding a slot. e.g. it is destroyed.
void retireSlot(BufferAllocator::AllocationId id);
// Call this to register a new material instance to UboManager.
void manageMaterialInstance(FMaterialInstance* instance);
// Call this when a material instance is destroyed.
void unmanageMaterialInstance(FMaterialInstance* materialInstance);
// Returns the size of the actual UBO. Note that when there's allocation failed, it will be
// reallocated to a bigger size at the next frame.
[[nodiscard]] BufferAllocator::allocation_size_t getTotalSize() const noexcept;
// For testing
[[nodiscard]] backend::MemoryMappedBufferHandle getMemoryMappedBufferHandle() const noexcept {
return mMemoryMappedBufferHandle;
}
private:
friend class ::UboManagerTest;
constexpr static float BUFFER_SIZE_GROWTH_MULTIPLIER = 1.5f;
enum AllocationResult {
@@ -134,23 +142,19 @@ private:
[[nodiscard]] BufferAllocator::allocation_size_t getAllocationOffset(
BufferAllocator::AllocationId id) const;
AllocationResult allocateOnDemand(
const std::unordered_map<const FMaterial*, ResourceList<FMaterialInstance>>&
materialInstances);
AllocationResult allocateOnDemand();
void allocateAllInstances(
const std::unordered_map<const FMaterial*, ResourceList<FMaterialInstance>>&
materialInstances);
void allocateAllInstances();
void reallocate(backend::DriverApi& driver, BufferAllocator::allocation_size_t requiredSize);
BufferAllocator::allocation_size_t calculateRequiredSize(
const std::unordered_map<const FMaterial*, ResourceList<FMaterialInstance>>&
materialInstances);
BufferAllocator::allocation_size_t calculateRequiredSize();
backend::Handle<backend::HwBufferObject> mUbHandle;
backend::MemoryMappedBufferHandle mMemoryMappedBufferHandle;
BufferAllocator::allocation_size_t mUboSize{};
std::unordered_set<FMaterialInstance*> mPendingInstances;
std::unordered_set<FMaterialInstance*> mManagedInstances;
FenceManager mFenceManager;
BufferAllocator mAllocator;

View File

@@ -285,53 +285,45 @@ FVertexBuffer::FVertexBuffer(FEngine& engine, const Builder& builder)
// calculate buffer sizes
size_t bufferSizes[MAX_VERTEX_BUFFER_COUNT] = {};
#pragma nounroll
for (size_t i = 0, n = mAttributes.size(); i < n; ++i) {
if (mDeclaredAttributes[i]) {
const uint32_t offset = mAttributes[i].offset;
const uint8_t stride = mAttributes[i].stride;
const uint8_t slot = mAttributes[i].buffer;
const size_t end = offset + mVertexCount * stride;
if (slot != Attribute::BUFFER_UNUSED) {
assert_invariant(slot < MAX_VERTEX_BUFFER_COUNT);
bufferSizes[slot] = std::max(bufferSizes[slot], end);
}
}
}
auto shouldCreateBuffer = [this](size_t attributeIndex) {
const uint8_t slot = mAttributes[attributeIndex].buffer;
return mDeclaredAttributes[attributeIndex] && slot != Attribute::BUFFER_UNUSED &&
!mBufferObjects[slot];
};
auto updateBufferSize = [&bufferSizes, this](size_t attributeIndex) {
const uint32_t offset = mAttributes[attributeIndex].offset;
const uint8_t stride = mAttributes[attributeIndex].stride;
const uint8_t slot = mAttributes[attributeIndex].buffer;
const size_t end = offset + mVertexCount * stride;
assert_invariant(slot < MAX_VERTEX_BUFFER_COUNT);
bufferSizes[slot] = std::max(bufferSizes[slot], end);
};
if (!mBufferObjectsEnabled) {
// If buffer objects are not enabled at the API level, then we create them internally.
#pragma nounroll
for (size_t index = 0; index < MAX_VERTEX_ATTRIBUTE_COUNT; ++index) {
size_t const i = mAttributes[index].buffer;
if (i != Attribute::BUFFER_UNUSED) {
assert_invariant(bufferSizes[i] > 0);
if (!mBufferObjects[i]) {
BufferObjectHandle const bo = driver.createBufferObject(bufferSizes[i],
BufferObjectBinding::VERTEX, BufferUsage::STATIC,
utils::ImmutableCString{ builder.getName() });
driver.setVertexBufferObject(mHandle, i, bo);
mBufferObjects[i] = bo;
}
for (size_t i = 0, n = mAttributes.size(); i < n; ++i) {
if (shouldCreateBuffer(i)) {
updateBufferSize(i);
}
}
} else {
// in advanced skinning mode, we manage the BONE_INDICES and BONE_WEIGHTS arrays ourselves,
// so we have to set the corresponding buffer objects.
if (mAdvancedSkinningEnabled) {
for (auto const index : { BONE_INDICES, BONE_WEIGHTS }) {
size_t const i = mAttributes[index].buffer;
assert_invariant(i != Attribute::BUFFER_UNUSED);
assert_invariant(bufferSizes[i] > 0);
if (!mBufferObjects[i]) {
BufferObjectHandle const bo = driver.createBufferObject(bufferSizes[i],
BufferObjectBinding::VERTEX, BufferUsage::STATIC,
utils::ImmutableCString{ builder.getName() });
driver.setVertexBufferObject(mHandle, i, bo);
mBufferObjects[i] = bo;
}
}
} else if (mAdvancedSkinningEnabled) {
// For advanced skinning mode, only relevant buffers (BONE_INDICES & BONE_WEIGHTS) are
// created. We manually populated the relevant attributes for those buffers above.
updateBufferSize(BONE_INDICES);
updateBufferSize(BONE_WEIGHTS);
}
// create buffers
for (size_t i = 0; i < MAX_VERTEX_BUFFER_COUNT; ++i) {
if (bufferSizes[i] == 0 || mBufferObjects[i]) {
continue;
}
BufferObjectHandle const bo = driver.createBufferObject(bufferSizes[i],
BufferObjectBinding::VERTEX, BufferUsage::STATIC,
utils::ImmutableCString{ builder.getName() });
driver.setVertexBufferObject(mHandle, i, bo);
mBufferObjects[i] = bo;
}
}

View File

@@ -50,14 +50,17 @@ if (TNT_DEV)
test_BufferAllocatorStress.cpp
test_CircularQueue.cpp
test_FenceManager.cpp
test_UboManager.cpp
filament_test_exposure.cpp
filament_rendering_test.cpp
filament_bimap_test.cpp
filament_framegraph_test.cpp
filament_test.cpp)
filament_test.cpp
${RESGEN_SOURCE})
target_link_libraries(test_${TARGET} PRIVATE filament gtest)
target_compile_options(test_${TARGET} PRIVATE ${COMPILER_FLAGS})
target_include_directories(test_${TARGET} PRIVATE ${RESOURCE_DIR})
set_target_properties(test_${TARGET} PROPERTIES FOLDER Tests)
add_executable(test_depth depth_test.cpp)

View File

@@ -0,0 +1,366 @@
/*
* Copyright (C) 2025 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "MockDriver.h"
#include "details/MaterialInstance.h"
#include "details/UboManager.h"
#include <private/backend/CommandBufferQueue.h>
#include <private/backend/CommandStream.h>
#include <private/backend/Driver.h>
#include "filament_test_resources.h"
namespace {
using namespace filament;
using namespace backend;
using ::testing::NiceMock;
using AllocationId = BufferAllocator::AllocationId;
using allocation_size_t = BufferAllocator::allocation_size_t;
} // anonymous namespace
class UboManagerTest : public ::testing::Test {
protected:
static constexpr size_t MIN_COMMAND_BUFFERS_SIZE = 1 * 1024 * 1024;
static constexpr size_t COMMAND_BUFFERS_SIZE = 3 * MIN_COMMAND_BUFFERS_SIZE;
static constexpr BufferAllocator::allocation_size_t DEFAULT_SLOT_SIZE = 64;
static constexpr BufferAllocator::allocation_size_t DEFAULT_TOTAL_SIZE = 1024;
UboManagerTest()
: mCommandBufferQueue(MIN_COMMAND_BUFFERS_SIZE, COMMAND_BUFFERS_SIZE, false),
mCommandStream(mMockDriver, mCommandBufferQueue.getCircularBuffer()),
mDriverApi(mCommandStream),
// The constructor will call reallocate, which calls createBufferObject.
// MockDriver's default ...S() implementation returns an incrementing handle.
// So, the first handle will be 1.
mUboManager(mDriverApi, DEFAULT_SLOT_SIZE, DEFAULT_TOTAL_SIZE),
mPendingInstances(mUboManager.mPendingInstances),
mManagedInstances(mUboManager.mManagedInstances),
mUbHandle(mUboManager.mUbHandle),
mAllocator(mUboManager.mAllocator) {
mEngine = Engine::Builder()
.feature("material.enable_material_instance_uniform_batching", true)
.backend(Backend::NOOP)
.build();
mMaterial = Material::Builder()
.package(FILAMENT_TEST_RESOURCES_TEST_MATERIAL_DATA,
FILAMENT_TEST_RESOURCES_TEST_MATERIAL_SIZE)
.build(*mEngine);
}
void TearDown() override {
mEngine->destroy(mMaterial);
Engine::destroy(&mEngine);
}
// The engine is only for creating materials/material instances, we're not using the UboManager
// inside for testing.
Engine* mEngine = nullptr;
NiceMock<MockDriver> mMockDriver;
CommandBufferQueue mCommandBufferQueue;
CommandStream mCommandStream;
DriverApi& mDriverApi;
UboManager mUboManager;
Material const* mMaterial;
std::unordered_set<FMaterialInstance*>& mPendingInstances;
std::unordered_set<FMaterialInstance*>& mManagedInstances;
Handle<HwBufferObject>& mUbHandle;
BufferAllocator& mAllocator;
};
TEST_F(UboManagerTest, InitialState) {
EXPECT_EQ(mUboManager.getTotalSize(), DEFAULT_TOTAL_SIZE);
EXPECT_EQ(mMockDriver.nextFakeHandle, 2);
EXPECT_NE(mUbHandle.getId(), HandleBase::nullid);
}
TEST_F(UboManagerTest, BeginFrameWithoutReallocate) {
BufferAllocator::allocation_size_t originalBufferSize = mUboManager.getTotalSize();
auto mi1 = static_cast<FMaterialInstance*>(mMaterial->createInstance());
EXPECT_EQ(mi1->getAllocationId(), BufferAllocator::UNALLOCATED);
ASSERT_TRUE(mi1->isUsingUboBatching());
// The mi1 should be put in the pending list.
mUboManager.manageMaterialInstance(mi1);
EXPECT_TRUE(mPendingInstances.contains(mi1));
EXPECT_FALSE(mManagedInstances.contains(mi1));
mUboManager.beginFrame(mDriverApi);
// The mi1 should be moved to managed list after beginFrame.
EXPECT_FALSE(mPendingInstances.contains(mi1));
EXPECT_TRUE(mManagedInstances.contains(mi1));
// Should have allocation after beginFrame.
EXPECT_TRUE(BufferAllocator::isValid(mi1->getAllocationId()));
// Reallocation is not triggered under this case.
EXPECT_EQ(mUboManager.getTotalSize(), originalBufferSize);
EXPECT_NE(mUboManager.getMemoryMappedBufferHandle().getId(), HandleBase::nullid);
mUboManager.finishBeginFrame(mDriverApi);
EXPECT_EQ(mUboManager.getMemoryMappedBufferHandle().getId(), HandleBase::nullid);
mUboManager.endFrame(mDriverApi);
EXPECT_TRUE(mAllocator.isLockedByGpu(mi1->getAllocationId()));
// We're not using the UboManager inside mEngine, so we need to unmanage the material instance
// by ourselves.
mUboManager.unmanageMaterialInstance(mi1);
EXPECT_FALSE(mPendingInstances.contains(mi1));
EXPECT_FALSE(mManagedInstances.contains(mi1));
mUboManager.terminate(mDriverApi);
mEngine->destroy(mi1);
}
TEST_F(UboManagerTest, BeginFrameWithReallocate) {
const allocation_size_t originalBufferSize = mUboManager.getTotalSize();
const Handle<HwBufferObject> originalBufferHandle = mUbHandle;
// Create enough material instances to trigger a reallocation.
constexpr size_t numInstances = (DEFAULT_TOTAL_SIZE / DEFAULT_SLOT_SIZE) + 1;
std::vector<FMaterialInstance*> instances;
instances.reserve(numInstances);
for (size_t i = 0; i < numInstances; ++i) {
auto mi = static_cast<FMaterialInstance*>(mMaterial->createInstance());
instances.push_back(mi);
mUboManager.manageMaterialInstance(mi);
}
// Before beginFrame, all instances should be pending.
EXPECT_EQ(mPendingInstances.size(), numInstances);
EXPECT_TRUE(mManagedInstances.empty());
mUboManager.beginFrame(mDriverApi);
// After beginFrame, reallocation should have occurred.
EXPECT_NE(mUbHandle.getId(), originalBufferHandle.getId());
EXPECT_GT(mUboManager.getTotalSize(), originalBufferSize);
// All instances should now be managed and have valid allocations.
EXPECT_TRUE(mPendingInstances.empty());
EXPECT_EQ(mManagedInstances.size(), numInstances);
for (const auto* mi: instances) {
EXPECT_TRUE(mManagedInstances.contains(const_cast<FMaterialInstance*>(mi)));
EXPECT_TRUE(BufferAllocator::isValid(mi->getAllocationId()));
}
mUboManager.finishBeginFrame(mDriverApi);
mUboManager.terminate(mDriverApi);
for (auto* mi: instances) {
// We're not using the UboManager inside mEngine, so we need to unmanage the material instance
// by ourselves.
mUboManager.unmanageMaterialInstance(mi);
mEngine->destroy(mi);
}
}
TEST_F(UboManagerTest, RecycleSlot) {
auto mi1 = static_cast<FMaterialInstance*>(mMaterial->createInstance());
mUboManager.manageMaterialInstance(mi1);
// Frame 1: mi1 gets an allocation.
mUboManager.beginFrame(mDriverApi);
const AllocationId mi1AllocationId = mi1->getAllocationId();
const allocation_size_t mi1AllocationOffset =
mAllocator.getAllocationOffset(mi1AllocationId);
EXPECT_TRUE(BufferAllocator::isValid(mi1AllocationId));
mUboManager.finishBeginFrame(mDriverApi);
mUboManager.endFrame(mDriverApi); // Locks mi1's allocation.
// Now, unmanage mi1. The slot should be retired but not yet released.
mUboManager.unmanageMaterialInstance(mi1);
EXPECT_TRUE(mAllocator.isLockedByGpu(mi1AllocationId));
// Frame 2: The slot for mi1 is still locked by the GPU.
// We expect getFenceStatus to be called for the fence from frame 1.
// We'll mock it to return TIMEOUT_EXPIRED, so the resource is not reclaimed.
EXPECT_CALL(mMockDriver, getFenceStatus(_)).WillOnce(Return(FenceStatus::TIMEOUT_EXPIRED));
mUboManager.beginFrame(mDriverApi);
mUboManager.finishBeginFrame(mDriverApi);
mUboManager.endFrame(mDriverApi);
// Frame 3: Now, we'll simulate that the fence from frame 1 has signaled.
// The resource for mi1 should be reclaimed.
EXPECT_CALL(mMockDriver, getFenceStatus(_)).WillOnce(Return(FenceStatus::CONDITION_SATISFIED));
auto mi2 = static_cast<FMaterialInstance*>(mMaterial->createInstance());
mUboManager.manageMaterialInstance(mi2);
mUboManager.beginFrame(mDriverApi);
// mi2 should now have a valid allocation, and it should reuse the slot from mi1.
EXPECT_TRUE(BufferAllocator::isValid(mi2->getAllocationId()));
EXPECT_EQ(mAllocator.getAllocationOffset(mi2->getAllocationId()), mi1AllocationOffset);
mUboManager.finishBeginFrame(mDriverApi);
mUboManager.unmanageMaterialInstance(mi2);
mUboManager.terminate(mDriverApi);
mEngine->destroy(mi1);
mEngine->destroy(mi2);
}
TEST_F(UboManagerTest, OrphanSlot) {
auto mi1 = static_cast<FMaterialInstance*>(mMaterial->createInstance());
mUboManager.manageMaterialInstance(mi1);
// Frame 1: mi1 gets an allocation.
mUboManager.beginFrame(mDriverApi);
const AllocationId alloc1 = mi1->getAllocationId();
EXPECT_TRUE(BufferAllocator::isValid(alloc1));
mUboManager.finishBeginFrame(mDriverApi);
mUboManager.endFrame(mDriverApi); // Locks alloc1.
// Frame 2: Mark the instance as dirty and begin a new frame.
// This should trigger orphaning.
mi1->getUniformBuffer().invalidate();
EXPECT_CALL(mMockDriver, getFenceStatus(_)).WillOnce(Return(FenceStatus::TIMEOUT_EXPIRED));
mUboManager.beginFrame(mDriverApi);
const AllocationId alloc2 = mi1->getAllocationId();
EXPECT_TRUE(BufferAllocator::isValid(alloc2));
EXPECT_NE(alloc1, alloc2); // Should have a new allocation.
mUboManager.finishBeginFrame(mDriverApi);
mUboManager.endFrame(mDriverApi); // Locks alloc2.
// Frame 3: The fence for alloc1 should now be signaled.
EXPECT_CALL(mMockDriver, getFenceStatus(_))
.WillOnce(Return(FenceStatus::TIMEOUT_EXPIRED)) // For alloc2's fence
.WillOnce(Return(FenceStatus::CONDITION_SATISFIED)); // For alloc1's fence
mUboManager.beginFrame(mDriverApi);
mUboManager.finishBeginFrame(mDriverApi);
mUboManager.unmanageMaterialInstance(mi1);
mUboManager.terminate(mDriverApi);
mEngine->destroy(mi1);
}
TEST_F(UboManagerTest, DoubleManage) {
auto mi1 = static_cast<FMaterialInstance*>(mMaterial->createInstance());
mUboManager.manageMaterialInstance(mi1);
EXPECT_EQ(mPendingInstances.size(), 1);
// Managing the same instance again should be a no-op.
mUboManager.manageMaterialInstance(mi1);
EXPECT_EQ(mPendingInstances.size(), 1);
mUboManager.terminate(mDriverApi);
mEngine->destroy(mi1);
}
TEST_F(UboManagerTest, ManageAndUnmanageBeforeBeginFrame) {
auto mi1 = static_cast<FMaterialInstance*>(mMaterial->createInstance());
mUboManager.manageMaterialInstance(mi1);
EXPECT_TRUE(mPendingInstances.contains(mi1));
mUboManager.unmanageMaterialInstance(mi1);
EXPECT_FALSE(mPendingInstances.contains(mi1));
// After beginFrame, the instance should not be in any list.
mUboManager.beginFrame(mDriverApi);
EXPECT_FALSE(mPendingInstances.contains(mi1));
EXPECT_FALSE(mManagedInstances.contains(mi1));
EXPECT_EQ(mi1->getAllocationId(), BufferAllocator::UNALLOCATED);
mUboManager.terminate(mDriverApi);
mEngine->destroy(mi1);
}
TEST_F(UboManagerTest, UnmanageUnmanaged) {
auto mi1 = static_cast<FMaterialInstance*>(mMaterial->createInstance());
// Unmanaging an instance that was never managed should not cause any issues.
mUboManager.unmanageMaterialInstance(mi1);
EXPECT_FALSE(mPendingInstances.contains(mi1));
EXPECT_FALSE(mManagedInstances.contains(mi1));
mUboManager.terminate(mDriverApi);
mEngine->destroy(mi1);
}
TEST_F(UboManagerTest, AllAllocationsLockedAfterEndFrame) {
constexpr size_t numInstances = 5;
std::vector<FMaterialInstance*> instances;
instances.reserve(numInstances);
for (size_t i = 0; i < numInstances; ++i) {
auto mi = static_cast<FMaterialInstance*>(mMaterial->createInstance());
instances.push_back(mi);
mUboManager.manageMaterialInstance(mi);
}
mUboManager.beginFrame(mDriverApi);
mUboManager.finishBeginFrame(mDriverApi);
mUboManager.endFrame(mDriverApi);
for (const auto* mi: instances) {
EXPECT_TRUE(mAllocator.isLockedByGpu(mi->getAllocationId()));
}
mUboManager.terminate(mDriverApi);
for (auto* mi: instances) {
// We're not using the UboManager inside mEngine, so we need to unmanage the material instance
// by ourselves.
mUboManager.unmanageMaterialInstance(mi);
mEngine->destroy(mi);
}
}
TEST_F(UboManagerTest, AllAllocationsLockedAfterEndFrameWithInvalidIdInBetween) {
constexpr size_t numInstances = 5;
std::vector<FMaterialInstance*> instances;
instances.reserve(numInstances);
for (size_t i = 0; i < numInstances; ++i) {
auto mi = static_cast<FMaterialInstance*>(mMaterial->createInstance());
instances.push_back(mi);
mUboManager.manageMaterialInstance(mi);
}
mUboManager.beginFrame(mDriverApi);
mUboManager.finishBeginFrame(mDriverApi);
// It should rarely happen, but we want to make sure all other instances are locked properly.
instances[2]->assignUboAllocation(mUbHandle, BufferAllocator::REALLOCATION_REQUIRED, 0);
mUboManager.endFrame(mDriverApi);
for (const auto* mi: instances) {
if (BufferAllocator::isValid(mi->getAllocationId())) {
EXPECT_TRUE(mAllocator.isLockedByGpu(mi->getAllocationId()));
}
}
mUboManager.terminate(mDriverApi);
for (auto* mi: instances) {
// We're not using the UboManager inside mEngine, so we need to unmanage the material instance
// by ourselves.
mUboManager.unmanageMaterialInstance(mi);
mEngine->destroy(mi);
}
}
// TODO: Add more tests for the beginFrame flow

Binary file not shown.

View File

@@ -1,5 +1,5 @@
---
Language: Cpp
BasedOnStyle: Google
PointerAlignment: Left
...

37
third_party/benchmark/.clang-tidy vendored Normal file
View File

@@ -0,0 +1,37 @@
---
Checks: >
abseil-*,
bugprone-*,
clang-analyzer-*,
cppcoreguidelines-*,
google-*,
misc-*,
performance-*,
readability-*,
-clang-analyzer-deadcode*,
-clang-analyzer-optin*,
-readability-identifier-length
WarningsAsErrors: ''
HeaderFilterRegex: ''
FormatStyle: none
CheckOptions:
llvm-else-after-return.WarnOnConditionVariables: 'false'
modernize-loop-convert.MinConfidence: reasonable
modernize-replace-auto-ptr.IncludeStyle: llvm
cert-str34-c.DiagnoseSignedUnsignedCharComparisons: 'false'
google-readability-namespace-comments.ShortNamespaceLines: '10'
cert-err33-c.CheckedFunctions: '::aligned_alloc;::asctime_s;::at_quick_exit;::atexit;::bsearch;::bsearch_s;::btowc;::c16rtomb;::c32rtomb;::calloc;::clock;::cnd_broadcast;::cnd_init;::cnd_signal;::cnd_timedwait;::cnd_wait;::ctime_s;::fclose;::fflush;::fgetc;::fgetpos;::fgets;::fgetwc;::fopen;::fopen_s;::fprintf;::fprintf_s;::fputc;::fputs;::fputwc;::fputws;::fread;::freopen;::freopen_s;::fscanf;::fscanf_s;::fseek;::fsetpos;::ftell;::fwprintf;::fwprintf_s;::fwrite;::fwscanf;::fwscanf_s;::getc;::getchar;::getenv;::getenv_s;::gets_s;::getwc;::getwchar;::gmtime;::gmtime_s;::localtime;::localtime_s;::malloc;::mbrtoc16;::mbrtoc32;::mbsrtowcs;::mbsrtowcs_s;::mbstowcs;::mbstowcs_s;::memchr;::mktime;::mtx_init;::mtx_lock;::mtx_timedlock;::mtx_trylock;::mtx_unlock;::printf_s;::putc;::putwc;::raise;::realloc;::remove;::rename;::scanf;::scanf_s;::setlocale;::setvbuf;::signal;::snprintf;::snprintf_s;::sprintf;::sprintf_s;::sscanf;::sscanf_s;::strchr;::strerror_s;::strftime;::strpbrk;::strrchr;::strstr;::strtod;::strtof;::strtoimax;::strtok;::strtok_s;::strtol;::strtold;::strtoll;::strtoul;::strtoull;::strtoumax;::strxfrm;::swprintf;::swprintf_s;::swscanf;::swscanf_s;::thrd_create;::thrd_detach;::thrd_join;::thrd_sleep;::time;::timespec_get;::tmpfile;::tmpfile_s;::tmpnam;::tmpnam_s;::tss_create;::tss_get;::tss_set;::ungetc;::ungetwc;::vfprintf;::vfprintf_s;::vfscanf;::vfscanf_s;::vfwprintf;::vfwprintf_s;::vfwscanf;::vfwscanf_s;::vprintf_s;::vscanf;::vscanf_s;::vsnprintf;::vsnprintf_s;::vsprintf;::vsprintf_s;::vsscanf;::vsscanf_s;::vswprintf;::vswprintf_s;::vswscanf;::vswscanf_s;::vwprintf_s;::vwscanf;::vwscanf_s;::wcrtomb;::wcschr;::wcsftime;::wcspbrk;::wcsrchr;::wcsrtombs;::wcsrtombs_s;::wcsstr;::wcstod;::wcstof;::wcstoimax;::wcstok;::wcstok_s;::wcstol;::wcstold;::wcstoll;::wcstombs;::wcstombs_s;::wcstoul;::wcstoull;::wcstoumax;::wcsxfrm;::wctob;::wctrans;::wctype;::wmemchr;::wprintf_s;::wscanf;::wscanf_s;'
cert-oop54-cpp.WarnOnlyIfThisHasSuspiciousField: 'false'
cert-dcl16-c.NewSuffixes: 'L;LL;LU;LLU'
google-readability-braces-around-statements.ShortStatementLines: '1'
cppcoreguidelines-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic: 'true'
google-readability-namespace-comments.SpacesBeforeComments: '2'
modernize-loop-convert.MaxCopySize: '16'
modernize-pass-by-value.IncludeStyle: llvm
modernize-use-nullptr.NullMacros: 'NULL'
llvm-qualified-auto.AddConstToQualified: 'false'
modernize-loop-convert.NamingStyle: CamelCase
llvm-else-after-return.WarnOnUnfixable: 'false'
google-readability-function-size.StatementThreshold: '800'
...

View File

@@ -0,0 +1 @@
.*third_party/.*

View File

@@ -0,0 +1,32 @@
---
name: Bug report
about: Create a report to help us improve
title: "[BUG]"
labels: ''
assignees: ''
---
**Describe the bug**
A clear and concise description of what the bug is.
**System**
Which OS, compiler, and compiler version are you using:
- OS:
- Compiler and version:
**To reproduce**
Steps to reproduce the behavior:
1. sync to commit ...
2. cmake/bazel...
3. make ...
4. See error
**Expected behavior**
A clear and concise description of what you expected to happen.
**Screenshots**
If applicable, add screenshots to help explain your problem.
**Additional context**
Add any other context about the problem here.

View File

@@ -0,0 +1,20 @@
---
name: Feature request
about: Suggest an idea for this project
title: "[FR]"
labels: ''
assignees: ''
---
**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
**Describe the solution you'd like**
A clear and concise description of what you want to happen.
**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.
**Additional context**
Add any other context or screenshots about the feature request here.

View File

@@ -0,0 +1,12 @@
if ! bazel version; then
arch=$(uname -m)
if [ "$arch" == "aarch64" ]; then
arch="arm64"
fi
echo "Downloading $arch Bazel binary from GitHub releases."
curl -L -o $HOME/bin/bazel --create-dirs "https://github.com/bazelbuild/bazel/releases/download/8.2.0/bazel-8.2.0-linux-$arch"
chmod +x $HOME/bin/bazel
else
# Bazel is installed for the correct architecture
exit 0
fi

35
third_party/benchmark/.github/libcxx-setup.sh vendored Executable file
View File

@@ -0,0 +1,35 @@
#!/usr/bin/env bash
set -e
# Checkout LLVM sources
git clone --filter=blob:none --depth=1 --branch llvmorg-19.1.6 --no-checkout https://github.com/llvm/llvm-project.git llvm-project
cd llvm-project
git sparse-checkout set --cone
git checkout llvmorg-19.1.6
git sparse-checkout set cmake llvm/cmake runtimes libcxx libcxxabi
cd ..
## Setup libc++ options
if [ -z "$BUILD_32_BITS" ]; then
export BUILD_32_BITS=OFF && echo disabling 32 bit build
fi
## Build and install libc++ (Use unstable ABI for better sanitizer coverage)
mkdir llvm-build && cd llvm-build
cmake -GNinja \
-DCMAKE_C_COMPILER=${CC} \
-DCMAKE_CXX_COMPILER=${CXX} \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DCMAKE_INSTALL_PREFIX=/usr \
-DLIBCXX_ABI_UNSTABLE=OFF \
-DLLVM_USE_SANITIZER=${LIBCXX_SANITIZER} \
-DLLVM_BUILD_32_BITS=${BUILD_32_BITS} \
-DLIBCXXABI_USE_LLVM_UNWINDER=OFF \
-DLLVM_INCLUDE_TESTS=OFF \
-DLIBCXX_INCLUDE_TESTS=OFF \
-DLIBCXX_INCLUDE_BENCHMARKS=OFF \
-DLLVM_ENABLE_RUNTIMES='libcxx;libcxxabi' \
../llvm-project/runtimes/
cmake --build . -- cxx cxxabi
cd ..

View File

@@ -0,0 +1,37 @@
name: bazel
on:
push: {}
pull_request: {}
env:
CMAKE_GENERATOR: Ninja
jobs:
build_and_test_default:
name: bazel.${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
steps:
- uses: actions/checkout@v4
- name: mount bazel cache
uses: actions/cache@v4
env:
cache-name: bazel-cache
with:
path: "~/.cache/bazel"
key: ${{ env.cache-name }}-${{ matrix.os }}-${{ github.ref }}
restore-keys: |
${{ env.cache-name }}-${{ matrix.os }}-main
- name: build
run: |
bazel build //:benchmark //:benchmark_main //test/...
- name: test
run: |
bazel test --test_output=all //test/...

View File

@@ -0,0 +1,49 @@
name: build-and-test-min-cmake
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
env:
CMAKE_GENERATOR: Ninja
jobs:
job:
name: ${{ matrix.os }}.min-cmake
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest]
steps:
- uses: actions/checkout@v4
- uses: lukka/get-cmake@latest
with:
cmakeVersion: 3.13.0
- name: create build environment
run: cmake -E make_directory ${{ runner.workspace }}/_build
- name: setup cmake initial cache
run: touch compiler-cache.cmake
- name: configure cmake
env:
CXX: ${{ matrix.compiler }}
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: >
cmake -C ${{ github.workspace }}/compiler-cache.cmake
$GITHUB_WORKSPACE
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
-DCMAKE_CXX_VISIBILITY_PRESET=hidden
-DCMAKE_VISIBILITY_INLINES_HIDDEN=ON
- name: build
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: cmake --build .

View File

@@ -0,0 +1,54 @@
name: build-and-test-perfcounters
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
env:
CMAKE_GENERATOR: Ninja
jobs:
job:
# TODO(dominic): Extend this to include compiler and set through env: CC/CXX.
name: ${{ matrix.os }}.${{ matrix.build_type }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
build_type: ['Release', 'Debug']
steps:
- uses: actions/checkout@v4
- name: install libpfm
run: |
sudo apt update
sudo apt -y install libpfm4-dev
- name: create build environment
run: cmake -E make_directory ${{ runner.workspace }}/_build
- name: configure cmake
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: >
cmake $GITHUB_WORKSPACE
-DBENCHMARK_ENABLE_LIBPFM=1
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
- name: build
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: cmake --build . --config ${{ matrix.build_type }}
# Skip testing, for now. It seems perf_event_open does not succeed on the
# hosting machine, very likely a permissions issue.
# TODO(mtrofin): Enable test.
# - name: test
# shell: bash
# working-directory: ${{ runner.workspace }}/_build
# run: ctest -C ${{ matrix.build_type }} --rerun-failed --output-on-failure

View File

@@ -0,0 +1,151 @@
name: build-and-test
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
env:
CMAKE_GENERATOR: Ninja
jobs:
# TODO: add 32-bit builds (g++ and clang++) for ubuntu
# (requires g++-multilib and libc6:i386)
# TODO: add coverage build (requires lcov)
# TODO: add clang + libc++ builds for ubuntu
job:
name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.lib }}.${{ matrix.compiler }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-24.04, ubuntu-22.04, ubuntu-24.04-arm, macos-latest]
build_type: ['Release', 'Debug']
compiler: ['g++', 'clang++']
lib: ['shared', 'static']
steps:
- name: Install dependencies (macos)
if: runner.os == 'macOS'
run: brew install ninja
- uses: actions/checkout@v4
- name: build
uses: threeal/cmake-action@v2.1.0
with:
build-dir: ${{ runner.workspace }}/_build
cxx-compiler: ${{ matrix.compiler }}
options: |
BENCHMARK_DOWNLOAD_DEPENDENCIES=ON
BUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }}
CMAKE_BUILD_TYPE=${{ matrix.build_type }}
CMAKE_CXX_COMPILER=${{ matrix.compiler }}
CMAKE_CXX_VISIBILITY_PRESET=hidden
CMAKE_VISIBILITY_INLINES_HIDDEN=ON
- name: test
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: ctest -C ${{ matrix.build_type }} -VV
msvc:
name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.lib }}.${{ matrix.msvc }}
runs-on: ${{ matrix.os }}
defaults:
run:
shell: powershell
strategy:
fail-fast: false
matrix:
msvc:
- VS-16-2019
- VS-17-2022
build_type:
- Debug
- Release
lib:
- shared
- static
include:
- msvc: VS-16-2019
os: windows-2019
generator: 'Visual Studio 16 2019'
- msvc: VS-17-2022
os: windows-2022
generator: 'Visual Studio 17 2022'
steps:
- uses: actions/checkout@v4
- uses: lukka/get-cmake@latest
- name: configure cmake
run: >
cmake -S . -B ${{ runner.workspace }}/_build/
-G "${{ matrix.generator }}"
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
-DBUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }}
- name: build
run: cmake --build ${{ runner.workspace }}/_build/ --config ${{ matrix.build_type }}
- name: test
run: ctest --test-dir ${{ runner.workspace }}/_build/ -C ${{ matrix.build_type }} -VV
msys2:
name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.lib }}.${{ matrix.msys2.msystem }}
runs-on: ${{ matrix.os }}
defaults:
run:
shell: msys2 {0}
strategy:
fail-fast: false
matrix:
os: [ windows-latest ]
msys2:
- { msystem: MINGW64, arch: x86_64, family: GNU, compiler: g++ }
- { msystem: CLANG64, arch: x86_64, family: LLVM, compiler: clang++ }
- { msystem: UCRT64, arch: x86_64, family: GNU, compiler: g++ }
build_type:
- Debug
- Release
lib:
- shared
- static
steps:
- name: setup msys2
uses: msys2/setup-msys2@v2
with:
cache: false
msystem: ${{ matrix.msys2.msystem }}
update: true
install: >-
git
base-devel
pacboy: >-
gcc:p
clang:p
cmake:p
ninja:p
- uses: actions/checkout@v4
# NOTE: we can't use cmake actions here as we need to do everything in msys2 shell.
- name: configure cmake
env:
CXX: ${{ matrix.msys2.compiler }}
run: >
cmake -S . -B _build/
-GNinja
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
-DBUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }}
- name: build
run: cmake --build _build/ --config ${{ matrix.build_type }}
- name: test
working-directory: _build
run: ctest -C ${{ matrix.build_type }} -VV

View File

@@ -0,0 +1,19 @@
name: clang-format-lint
on:
push: {}
pull_request: {}
env:
CMAKE_GENERATOR: Ninja
jobs:
job:
name: check-clang-format
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: DoozyX/clang-format-lint-action@v0.18.2
with:
source: './include/benchmark ./src ./test'
clangFormatVersion: 18

View File

@@ -0,0 +1,41 @@
name: clang-tidy
on:
push: {}
pull_request: {}
env:
CMAKE_GENERATOR: Ninja
jobs:
job:
name: run-clang-tidy
runs-on: ubuntu-latest
strategy:
fail-fast: false
steps:
- uses: actions/checkout@v4
- name: install clang-tidy
run: sudo apt update && sudo apt -y install clang-tidy
- name: create build environment
run: cmake -E make_directory ${{ github.workspace }}/_build
- name: configure cmake
shell: bash
working-directory: ${{ github.workspace }}/_build
run: >
cmake $GITHUB_WORKSPACE
-DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF
-DBENCHMARK_ENABLE_LIBPFM=OFF
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
-DCMAKE_C_COMPILER=clang
-DCMAKE_CXX_COMPILER=clang++
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
-DGTEST_COMPILE_COMMANDS=OFF
- name: run
shell: bash
working-directory: ${{ github.workspace }}/_build
run: run-clang-tidy -config-file=$GITHUB_WORKSPACE/.clang-tidy

View File

@@ -0,0 +1,31 @@
name: doxygen
on:
push:
branches: [main]
pull_request:
branches: [main]
env:
CMAKE_GENERATOR: Ninja
jobs:
build-and-deploy:
name: Build HTML documentation
runs-on: ubuntu-latest
steps:
- name: Fetching sources
uses: actions/checkout@v4
- name: Installing build dependencies
run: |
sudo apt update
sudo apt install doxygen gcc git
- name: Creating build directory
run: mkdir build
- name: Building HTML documentation with Doxygen
run: |
cmake -S . -B build -DBENCHMARK_ENABLE_TESTING:BOOL=OFF -DBENCHMARK_ENABLE_DOXYGEN:BOOL=ON -DBENCHMARK_INSTALL_DOCS:BOOL=ON
cmake --build build --target benchmark_doxygen

View File

@@ -0,0 +1,41 @@
name: python + Bazel pre-commit checks
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
env:
CMAKE_GENERATOR: Ninja
jobs:
pre-commit:
runs-on: ubuntu-latest
env:
MYPY_CACHE_DIR: "${{ github.workspace }}/.cache/mypy"
RUFF_CACHE_DIR: "${{ github.workspace }}/.cache/ruff"
PRE_COMMIT_HOME: "${{ github.workspace }}/.cache/pre-commit"
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: 3.11
cache: pip
cache-dependency-path: pyproject.toml
- name: Install dependencies
run: python -m pip install ".[dev]"
- name: Cache pre-commit tools
uses: actions/cache@v4
with:
path: |
${{ env.MYPY_CACHE_DIR }}
${{ env.RUFF_CACHE_DIR }}
${{ env.PRE_COMMIT_HOME }}
key: ${{ runner.os }}-${{ hashFiles('.pre-commit-config.yaml') }}-linter-cache
- name: Run pre-commit checks
run: pre-commit run --all-files --verbose --show-diff-on-failure

View File

@@ -0,0 +1,97 @@
name: sanitizer
on:
push: {}
pull_request: {}
env:
CMAKE_GENERATOR: Ninja
UBSAN_OPTIONS: "print_stacktrace=1"
jobs:
job:
name: ${{ matrix.sanitizer }}.${{ matrix.build_type }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
build_type: ['Debug', 'RelWithDebInfo']
sanitizer: ['asan', 'ubsan', 'tsan', 'msan']
steps:
- uses: actions/checkout@v4
- name: configure msan env
if: matrix.sanitizer == 'msan'
run: |
echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins" >> $GITHUB_ENV
echo "LIBCXX_SANITIZER=MemoryWithOrigins" >> $GITHUB_ENV
- name: configure ubsan env
if: matrix.sanitizer == 'ubsan'
run: |
echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=all" >> $GITHUB_ENV
echo "LIBCXX_SANITIZER=Undefined" >> $GITHUB_ENV
- name: configure asan env
if: matrix.sanitizer == 'asan'
run: |
echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=address -fno-sanitize-recover=all" >> $GITHUB_ENV
echo "LIBCXX_SANITIZER=Address" >> $GITHUB_ENV
- name: configure tsan env
if: matrix.sanitizer == 'tsan'
run: |
echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all" >> $GITHUB_ENV
echo "LIBCXX_SANITIZER=Thread" >> $GITHUB_ENV
- name: fine-tune asan options
# in asan we get an error from std::regex. ignore it.
if: matrix.sanitizer == 'asan'
run: |
echo "ASAN_OPTIONS=alloc_dealloc_mismatch=0" >> $GITHUB_ENV
- name: setup clang
uses: egor-tensin/setup-clang@v1
with:
version: latest
platform: x64
- name: configure clang
run: |
echo "CC=cc" >> $GITHUB_ENV
echo "CXX=c++" >> $GITHUB_ENV
- name: build libc++ (non-asan)
if: matrix.sanitizer != 'asan'
run: |
"${GITHUB_WORKSPACE}/.github/libcxx-setup.sh"
echo "EXTRA_CXX_FLAGS=-stdlib=libc++ -L${GITHUB_WORKSPACE}/llvm-build/lib -lc++abi -I${GITHUB_WORKSPACE}/llvm-build/include/c++/v1 -Isystem${GITHUB_WORKSPACE}/llvm-build/include/c++/v1 -Wl,-rpath,${GITHUB_WORKSPACE}/llvm-build/lib" >> $GITHUB_ENV
- name: create build environment
run: cmake -E make_directory ${{ runner.workspace }}/_build
- name: configure cmake
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: >
VERBOSE=1
cmake -GNinja $GITHUB_WORKSPACE
-DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF
-DBENCHMARK_ENABLE_LIBPFM=OFF
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
-DCMAKE_C_COMPILER=${{ env.CC }}
-DCMAKE_CXX_COMPILER=${{ env.CXX }}
-DCMAKE_C_FLAGS="${{ env.EXTRA_FLAGS }}"
-DCMAKE_CXX_FLAGS="${{ env.EXTRA_FLAGS }} ${{ env.EXTRA_CXX_FLAGS }}"
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
- name: build
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: cmake --build . --config ${{ matrix.build_type }}
- name: test
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: ctest -C ${{ matrix.build_type }} -VV

View File

@@ -0,0 +1,33 @@
name: test-bindings
on:
push:
branches: [main]
pull_request:
branches: [main]
env:
CMAKE_GENERATOR: Ninja
jobs:
python_bindings:
name: Test GBM Python ${{ matrix.python-version }} bindings on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ ubuntu-latest, macos-latest, windows-latest ]
python-version: [ "3.10", "3.11", "3.12", "3.13" ]
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install GBM Python bindings on ${{ matrix.os }}
run: python -m pip install .
- name: Run example on ${{ matrix.os }} under Python ${{ matrix.python-version }}
run: python bindings/python/google_benchmark/example.py

View File

@@ -0,0 +1,83 @@
name: Build and upload Python wheels
on:
workflow_dispatch:
release:
types:
- published
env:
CMAKE_GENERATOR: Ninja
jobs:
build_sdist:
name: Build source distribution
runs-on: ubuntu-latest
steps:
- name: Check out repo
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Install Python 3.12
uses: actions/setup-python@v5
with:
python-version: "3.12"
- run: python -m pip install build
- name: Build sdist
run: python -m build --sdist
- uses: actions/upload-artifact@v4
with:
name: dist-sdist
path: dist/*.tar.gz
build_wheels:
name: Build Google Benchmark wheels on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, ubuntu-24.04-arm, macos-13, macos-14, windows-latest]
steps:
- name: Check out Google Benchmark
uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/setup-python@v5
name: Install Python 3.12
with:
python-version: "3.12"
- run: pip install --upgrade pip uv
- name: Build wheels on ${{ matrix.os }} using cibuildwheel
uses: pypa/cibuildwheel@v2.23.2
env:
CIBW_BUILD: "cp310-* cp311-* cp312-*"
CIBW_BUILD_FRONTEND: "build[uv]"
CIBW_SKIP: "*-musllinux_*"
CIBW_ARCHS: auto64
CIBW_BEFORE_ALL_LINUX: bash .github/install_bazel.sh
# Grab the rootless Bazel installation inside the container.
CIBW_ENVIRONMENT_LINUX: PATH=$PATH:$HOME/bin
CIBW_TEST_COMMAND: python {project}/bindings/python/google_benchmark/example.py
# unused by Bazel, but needed explicitly by delocate on MacOS.
MACOSX_DEPLOYMENT_TARGET: "10.14"
- name: Upload Google Benchmark ${{ matrix.os }} wheels
uses: actions/upload-artifact@v4
with:
name: dist-${{ matrix.os }}
path: wheelhouse/*.whl
pypi_upload:
name: Publish google-benchmark wheels to PyPI
needs: [build_sdist, build_wheels]
runs-on: ubuntu-latest
permissions:
id-token: write
steps:
- uses: actions/download-artifact@v4
with:
path: dist
pattern: dist-*
merge-multiple: true
- uses: pypa/gh-action-pypi-publish@release/v1

View File

@@ -8,8 +8,10 @@
!/cmake/*.cmake
!/test/AssemblyTests.cmake
*~
*.swp
*.pyc
__pycache__
.DS_Store
# lcov
*.lcov
@@ -44,6 +46,7 @@ rules.ninja
# bazel output symlinks.
bazel-*
MODULE.bazel.lock
# out-of-source build top-level folders.
build/
@@ -56,3 +59,10 @@ build*/
# Visual Studio 2015/2017 cache/options directory
.vs/
CMakeSettings.json
# Visual Studio Code cache/options directory
.vscode/
# Python build stuff
dist/
*.egg-info*

View File

@@ -0,0 +1,18 @@
repos:
- repo: https://github.com/keith/pre-commit-buildifier
rev: 8.0.3
hooks:
- id: buildifier
- id: buildifier-lint
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.15.0
hooks:
- id: mypy
types_or: [ python, pyi ]
args: [ "--ignore-missing-imports", "--scripts-are-modules" ]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.11.8
hooks:
- id: ruff
args: [ --fix, --exit-non-zero-on-fix ]
- id: ruff-format

View File

@@ -1,28 +0,0 @@
#!/usr/bin/env bash
# Install a newer CMake version
curl -sSL https://cmake.org/files/v3.6/cmake-3.6.1-Linux-x86_64.sh -o install-cmake.sh
chmod +x install-cmake.sh
sudo ./install-cmake.sh --prefix=/usr/local --skip-license
# Checkout LLVM sources
git clone --depth=1 https://github.com/llvm-mirror/llvm.git llvm-source
git clone --depth=1 https://github.com/llvm-mirror/libcxx.git llvm-source/projects/libcxx
git clone --depth=1 https://github.com/llvm-mirror/libcxxabi.git llvm-source/projects/libcxxabi
# Setup libc++ options
if [ -z "$BUILD_32_BITS" ]; then
export BUILD_32_BITS=OFF && echo disabling 32 bit build
fi
# Build and install libc++ (Use unstable ABI for better sanitizer coverage)
mkdir llvm-build && cd llvm-build
cmake -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${COMPILER} \
-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX=/usr \
-DLIBCXX_ABI_UNSTABLE=ON \
-DLLVM_USE_SANITIZER=${LIBCXX_SANITIZER} \
-DLLVM_BUILD_32_BITS=${BUILD_32_BITS} \
../llvm-source
make cxx -j2
sudo make install-cxxabi install-cxx
cd ../

View File

@@ -1,199 +0,0 @@
sudo: required
dist: trusty
language: cpp
env:
global:
- /usr/local/bin:$PATH
matrix:
include:
- compiler: gcc
addons:
apt:
packages:
- lcov
env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Coverage
- compiler: gcc
env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Debug
- compiler: gcc
env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Release
- compiler: gcc
addons:
apt:
packages:
- g++-multilib
env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Debug BUILD_32_BITS=ON
- compiler: gcc
addons:
apt:
packages:
- g++-multilib
env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Release BUILD_32_BITS=ON
- compiler: gcc
env:
- INSTALL_GCC6_FROM_PPA=1
- COMPILER=g++-6 C_COMPILER=gcc-6 BUILD_TYPE=Debug
- ENABLE_SANITIZER=1
- EXTRA_FLAGS="-fno-omit-frame-pointer -g -O2 -fsanitize=undefined,address -fuse-ld=gold"
- compiler: clang
env: COMPILER=clang++ C_COMPILER=clang BUILD_TYPE=Debug
- compiler: clang
env: COMPILER=clang++ C_COMPILER=clang BUILD_TYPE=Release
# Clang w/ libc++
- compiler: clang
addons:
apt:
packages:
clang-3.8
env:
- COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug
- LIBCXX_BUILD=1
- EXTRA_FLAGS="-stdlib=libc++"
- compiler: clang
addons:
apt:
packages:
clang-3.8
env:
- COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Release
- LIBCXX_BUILD=1
- EXTRA_FLAGS="-stdlib=libc++"
# Clang w/ 32bit libc++
- compiler: clang
addons:
apt:
packages:
- clang-3.8
- g++-multilib
env:
- COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug
- LIBCXX_BUILD=1
- BUILD_32_BITS=ON
- EXTRA_FLAGS="-stdlib=libc++ -m32"
# Clang w/ 32bit libc++
- compiler: clang
addons:
apt:
packages:
- clang-3.8
- g++-multilib
env:
- COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Release
- LIBCXX_BUILD=1
- BUILD_32_BITS=ON
- EXTRA_FLAGS="-stdlib=libc++ -m32"
# Clang w/ libc++, ASAN, UBSAN
- compiler: clang
addons:
apt:
packages:
clang-3.8
env:
- COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug
- LIBCXX_BUILD=1 LIBCXX_SANITIZER="Undefined;Address"
- ENABLE_SANITIZER=1
- EXTRA_FLAGS="-stdlib=libc++ -g -O2 -fno-omit-frame-pointer -fsanitize=undefined,address -fno-sanitize-recover=all"
- UBSAN_OPTIONS=print_stacktrace=1
# Clang w/ libc++ and MSAN
- compiler: clang
addons:
apt:
packages:
clang-3.8
env:
- COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug
- LIBCXX_BUILD=1 LIBCXX_SANITIZER=MemoryWithOrigins
- ENABLE_SANITIZER=1
- EXTRA_FLAGS="-stdlib=libc++ -g -O2 -fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins"
# Clang w/ libc++ and MSAN
- compiler: clang
addons:
apt:
packages:
clang-3.8
env:
- COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=RelWithDebInfo
- LIBCXX_BUILD=1 LIBCXX_SANITIZER=Thread
- ENABLE_SANITIZER=1
- EXTRA_FLAGS="-stdlib=libc++ -g -O2 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all"
- os: osx
osx_image: xcode8.3
compiler: clang
env:
- COMPILER=clang++ BUILD_TYPE=Debug
- os: osx
osx_image: xcode8.3
compiler: clang
env:
- COMPILER=clang++ BUILD_TYPE=Release
- os: osx
osx_image: xcode8.3
compiler: clang
env:
- COMPILER=clang++ BUILD_TYPE=Release BUILD_32_BITS=ON
- os: osx
osx_image: xcode8.3
compiler: gcc
env:
- COMPILER=g++-7 C_COMPILER=gcc-7 BUILD_TYPE=Debug
before_script:
- if [ -n "${LIBCXX_BUILD}" ]; then
source .travis-libcxx-setup.sh;
fi
- if [ -n "${ENABLE_SANITIZER}" ]; then
export EXTRA_OPTIONS="-DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF";
else
export EXTRA_OPTIONS="";
fi
- mkdir -p build && cd build
before_install:
- if [ -z "$BUILD_32_BITS" ]; then
export BUILD_32_BITS=OFF && echo disabling 32 bit build;
fi
- if [ -n "${INSTALL_GCC6_FROM_PPA}" ]; then
sudo add-apt-repository -y "ppa:ubuntu-toolchain-r/test";
sudo apt-get update --option Acquire::Retries=100 --option Acquire::http::Timeout="60";
fi
install:
- if [ -n "${INSTALL_GCC6_FROM_PPA}" ]; then
travis_wait sudo -E apt-get -yq --no-install-suggests --no-install-recommends install g++-6;
fi
- if [ "${TRAVIS_OS_NAME}" == "linux" -a "${BUILD_32_BITS}" == "OFF" ]; then
travis_wait sudo -E apt-get -y --no-install-suggests --no-install-recommends install llvm-3.9-tools;
sudo cp /usr/lib/llvm-3.9/bin/FileCheck /usr/local/bin/;
fi
- if [ "${BUILD_TYPE}" == "Coverage" -a "${TRAVIS_OS_NAME}" == "linux" ]; then
PATH=~/.local/bin:${PATH};
pip install --user --upgrade pip;
travis_wait pip install --user cpp-coveralls;
fi
- if [ "${C_COMPILER}" == "gcc-7" -a "${TRAVIS_OS_NAME}" == "osx" ]; then
rm -f /usr/local/include/c++;
brew update;
travis_wait brew install gcc@7;
fi
- if [ "${TRAVIS_OS_NAME}" == "linux" ]; then
sudo apt-get update -qq;
sudo apt-get install -qq unzip;
wget https://github.com/bazelbuild/bazel/releases/download/0.10.1/bazel-0.10.1-installer-linux-x86_64.sh --output-document bazel-installer.sh;
travis_wait sudo bash bazel-installer.sh;
fi
- if [ "${TRAVIS_OS_NAME}" == "osx" ]; then
curl -L -o bazel-installer.sh https://github.com/bazelbuild/bazel/releases/download/0.10.1/bazel-0.10.1-installer-darwin-x86_64.sh;
travis_wait sudo bash bazel-installer.sh;
fi
script:
- cmake -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DCMAKE_CXX_FLAGS="${EXTRA_FLAGS}" -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -DBENCHMARK_BUILD_32_BITS=${BUILD_32_BITS} ${EXTRA_OPTIONS} ..
- make
- ctest -C ${BUILD_TYPE} --output-on-failure
- bazel test -c dbg --define google_benchmark.have_regex=posix --announce_rc --verbose_failures --test_output=errors --keep_going //test/...
after_success:
- if [ "${BUILD_TYPE}" == "Coverage" -a "${TRAVIS_OS_NAME}" == "linux" ]; then
coveralls --include src --include include --gcov-options '\-lp' --root .. --build-root .;
fi

View File

@@ -1,25 +1,30 @@
import os
import ycm_core
# These are the compilation flags that will be used in case there's no
# compilation database set (by default, one is not set).
# CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR.
flags = [
'-Wall',
'-Werror',
'-pedantic-errors',
'-std=c++0x',
'-fno-strict-aliasing',
'-O3',
'-DNDEBUG',
# ...and the same thing goes for the magic -x option which specifies the
# language that the files to be compiled are written in. This is mostly
# relevant for c++ headers.
# For a C project, you would set this to 'c' instead of 'c++'.
'-x', 'c++',
'-I', 'include',
'-isystem', '/usr/include',
'-isystem', '/usr/local/include',
"-Wall",
"-Werror",
"-pedantic-errors",
"-std=c++0x",
"-fno-strict-aliasing",
"-O3",
"-DNDEBUG",
# ...and the same thing goes for the magic -x option which specifies the
# language that the files to be compiled are written in. This is mostly
# relevant for c++ headers.
# For a C project, you would set this to 'c' instead of 'c++'.
"-x",
"c++",
"-I",
"include",
"-isystem",
"/usr/include",
"-isystem",
"/usr/local/include",
]
@@ -29,87 +34,87 @@ flags = [
#
# Most projects will NOT need to set this to anything; you can just change the
# 'flags' list of compilation flags. Notice that YCM itself uses that approach.
compilation_database_folder = ''
compilation_database_folder = ""
if os.path.exists( compilation_database_folder ):
database = ycm_core.CompilationDatabase( compilation_database_folder )
if os.path.exists(compilation_database_folder):
database = ycm_core.CompilationDatabase(compilation_database_folder)
else:
database = None
database = None
SOURCE_EXTENSIONS = [".cc"]
SOURCE_EXTENSIONS = [ '.cc' ]
def DirectoryOfThisScript():
return os.path.dirname( os.path.abspath( __file__ ) )
return os.path.dirname(os.path.abspath(__file__))
def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
if not working_directory:
return list( flags )
new_flags = []
make_next_absolute = False
path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
for flag in flags:
new_flag = flag
def MakeRelativePathsInFlagsAbsolute(flags, working_directory):
if not working_directory:
return list(flags)
new_flags = []
make_next_absolute = False
path_flags = ["-isystem", "-I", "-iquote", "--sysroot="]
for flag in flags:
new_flag = flag
if make_next_absolute:
make_next_absolute = False
if not flag.startswith( '/' ):
new_flag = os.path.join( working_directory, flag )
if make_next_absolute:
make_next_absolute = False
if not flag.startswith("/"):
new_flag = os.path.join(working_directory, flag)
for path_flag in path_flags:
if flag == path_flag:
make_next_absolute = True
break
for path_flag in path_flags:
if flag == path_flag:
make_next_absolute = True
break
if flag.startswith( path_flag ):
path = flag[ len( path_flag ): ]
new_flag = path_flag + os.path.join( working_directory, path )
break
if flag.startswith(path_flag):
path = flag[len(path_flag) :]
new_flag = path_flag + os.path.join(working_directory, path)
break
if new_flag:
new_flags.append( new_flag )
return new_flags
if new_flag:
new_flags.append(new_flag)
return new_flags
def IsHeaderFile( filename ):
extension = os.path.splitext( filename )[ 1 ]
return extension in [ '.h', '.hxx', '.hpp', '.hh' ]
def IsHeaderFile(filename):
extension = os.path.splitext(filename)[1]
return extension in [".h", ".hxx", ".hpp", ".hh"]
def GetCompilationInfoForFile( filename ):
# The compilation_commands.json file generated by CMake does not have entries
# for header files. So we do our best by asking the db for flags for a
# corresponding source file, if any. If one exists, the flags for that file
# should be good enough.
if IsHeaderFile( filename ):
basename = os.path.splitext( filename )[ 0 ]
for extension in SOURCE_EXTENSIONS:
replacement_file = basename + extension
if os.path.exists( replacement_file ):
compilation_info = database.GetCompilationInfoForFile(
replacement_file )
if compilation_info.compiler_flags_:
return compilation_info
return None
return database.GetCompilationInfoForFile( filename )
def GetCompilationInfoForFile(filename):
# The compilation_commands.json file generated by CMake does not have
# entries for header files. So we do our best by asking the db for flags for
# a corresponding source file, if any. If one exists, the flags for that
# file should be good enough.
if IsHeaderFile(filename):
basename = os.path.splitext(filename)[0]
for extension in SOURCE_EXTENSIONS:
replacement_file = basename + extension
if os.path.exists(replacement_file):
compilation_info = database.GetCompilationInfoForFile(
replacement_file
)
if compilation_info.compiler_flags_:
return compilation_info
return None
return database.GetCompilationInfoForFile(filename)
def FlagsForFile( filename, **kwargs ):
if database:
# Bear in mind that compilation_info.compiler_flags_ does NOT return a
# python list, but a "list-like" StringVec object
compilation_info = GetCompilationInfoForFile( filename )
if not compilation_info:
return None
def FlagsForFile(filename, **kwargs):
if database:
# Bear in mind that compilation_info.compiler_flags_ does NOT return a
# python list, but a "list-like" StringVec object
compilation_info = GetCompilationInfoForFile(filename)
if not compilation_info:
return None
final_flags = MakeRelativePathsInFlagsAbsolute(
compilation_info.compiler_flags_,
compilation_info.compiler_working_dir_ )
else:
relative_to = DirectoryOfThisScript()
final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to )
final_flags = MakeRelativePathsInFlagsAbsolute(
compilation_info.compiler_flags_,
compilation_info.compiler_working_dir_,
)
else:
relative_to = DirectoryOfThisScript()
final_flags = MakeRelativePathsInFlagsAbsolute(flags, relative_to)
return {
'flags': final_flags,
'do_cache': True
}
return {"flags": final_flags, "do_cache": True}

View File

@@ -9,40 +9,64 @@
# Please keep the list sorted.
Albert Pretorius <pretoalb@gmail.com>
Alex Steele <steeleal123@gmail.com>
Andriy Berestovskyy <berestovskyy@gmail.com>
Arne Beer <arne@twobeer.de>
Carto
Cezary Skrzyński <czars1988@gmail.com>
Christian Wassermann <christian_wassermann@web.de>
Christopher Seymour <chris.j.seymour@hotmail.com>
Colin Braley <braley.colin@gmail.com>
Daniel Harvey <danielharvey458@gmail.com>
David Coeurjolly <david.coeurjolly@liris.cnrs.fr>
Deniz Evrenci <denizevrenci@gmail.com>
Dirac Research
Dominik Czarnota <dominik.b.czarnota@gmail.com>
Dominik Korman <kormandominik@gmail.com>
Donald Aingworth <donalds_junk_mail@yahoo.com>
Eric Backus <eric_backus@alum.mit.edu>
Eric Fiselier <eric@efcs.ca>
Eugene Zhuk <eugene.zhuk@gmail.com>
Evgeny Safronov <division494@gmail.com>
Fabien Pichot <pichot.fabien@gmail.com>
Federico Ficarelli <federico.ficarelli@gmail.com>
Felix Homann <linuxaudio@showlabor.de>
Gergely Meszaros <maetveis@gmail.com>
Gergő Szitár <szitar.gergo@gmail.com>
Google Inc.
Henrique Bucher <hbucher@gmail.com>
International Business Machines Corporation
Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com>
Jern-Kuan Leong <jernkuan@gmail.com>
JianXiong Zhou <zhoujianxiong2@gmail.com>
Joao Paulo Magalhaes <joaoppmagalhaes@gmail.com>
Jordan Williams <jwillikers@protonmail.com>
Jussi Knuuttila <jussi.knuuttila@gmail.com>
Kaito Udagawa <umireon@gmail.com>
Kishan Kumar <kumar.kishan@outlook.com>
Lei Xu <eddyxu@gmail.com>
Marcel Jacobse <mjacobse@uni-bremen.de>
Matt Clarkson <mattyclarkson@gmail.com>
Maxim Vafin <maxvafin@gmail.com>
Mike Apodaca <gatorfax@gmail.com>
Min-Yih Hsu <yihshyng223@gmail.com>
MongoDB Inc.
Nick Hutchinson <nshutchinson@gmail.com>
Norman Heino <norman.heino@gmail.com>
Oleksandr Sochka <sasha.sochka@gmail.com>
Ori Livneh <ori.livneh@gmail.com>
Paul Redmond <paul.redmond@gmail.com>
Radoslav Yovchev <radoslav.tm@gmail.com>
Raghu Raja <raghu@enfabrica.net>
Rainer Orth <ro@cebitec.uni-bielefeld.de>
Roman Lebedev <lebedev.ri@gmail.com>
Sayan Bhattacharjee <aero.sayan@gmail.com>
Shapr3D <google-contributors@shapr3d.com>
Shuo Chen <chenshuo@chenshuo.com>
Staffan Tjernstrom <staffantj@gmail.com>
Steinar H. Gunderson <sgunderson@bigfoot.com>
Stripe, Inc.
Tobias Schmidt <tobias.schmidt@in.tum.de>
Yixuan Qiu <yixuanq@gmail.com>
Yusuke Suzuki <utatane.tea@gmail.com>
Zbigniew Skowron <zbychs@gmail.com>

View File

@@ -1,9 +1,38 @@
load("@rules_cc//cc:defs.bzl", "cc_library")
licenses(["notice"])
COPTS = [
"-pedantic",
"-pedantic-errors",
"-std=c++17",
"-Wall",
"-Wconversion",
"-Wextra",
"-Wshadow",
# "-Wshorten-64-to-32",
"-Wfloat-equal",
"-fstrict-aliasing",
## assert() are used a lot in tests upstream, which may be optimised out leading to
## unused-variable warning.
"-Wno-unused-variable",
"-Werror=old-style-cast",
]
MSVC_COPTS = [
"/std:c++17",
]
config_setting(
name = "windows",
values = {
"cpu": "x64_windows",
constraint_values = ["@platforms//os:windows"],
visibility = [":__subpackages__"],
)
config_setting(
name = "perfcounters",
define_values = {
"pfm": "1",
},
visibility = [":__subpackages__"],
)
@@ -17,20 +46,51 @@ cc_library(
],
exclude = ["src/benchmark_main.cc"],
),
hdrs = ["include/benchmark/benchmark.h"],
hdrs = [
"include/benchmark/benchmark.h",
"include/benchmark/export.h",
],
copts = select({
":windows": MSVC_COPTS,
"//conditions:default": COPTS,
}),
defines = [
"BENCHMARK_STATIC_DEFINE",
"BENCHMARK_VERSION=\\\"" + (module_version() if module_version() != None else "") + "\\\"",
] + select({
":perfcounters": ["HAVE_LIBPFM"],
"//conditions:default": [],
}),
includes = ["include"],
linkopts = select({
":windows": ["-DEFAULTLIB:shlwapi.lib"],
"//conditions:default": ["-pthread"],
}),
strip_include_prefix = "include",
# Only static linking is allowed; no .so will be produced.
# Using `defines` (i.e. not `local_defines`) means that no
# dependent rules need to bother about defining the macro.
linkstatic = True,
local_defines = [
# Turn on Large-file Support
"_FILE_OFFSET_BITS=64",
"_LARGEFILE64_SOURCE",
"_LARGEFILE_SOURCE",
],
visibility = ["//visibility:public"],
deps = select({
":perfcounters": ["@libpfm"],
"//conditions:default": [],
}),
)
cc_library(
name = "benchmark_main",
srcs = ["src/benchmark_main.cc"],
hdrs = ["include/benchmark/benchmark.h"],
strip_include_prefix = "include",
hdrs = [
"include/benchmark/benchmark.h",
"include/benchmark/export.h",
],
includes = ["include"],
visibility = ["//visibility:public"],
deps = [":benchmark"],
)

View File

@@ -1,27 +1,34 @@
cmake_minimum_required (VERSION 2.8.12)
# Require CMake 3.10. If available, use the policies up to CMake 3.22.
cmake_minimum_required (VERSION 3.13...3.22)
project (benchmark)
foreach(p
CMP0054 # CMake 3.1
CMP0056 # export EXE_LINKER_FLAGS to try_run
CMP0057 # Support no if() IN_LIST operator
)
if(POLICY ${p})
cmake_policy(SET ${p} NEW)
endif()
endforeach()
project (benchmark VERSION 1.9.4 LANGUAGES CXX)
option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON)
option(BENCHMARK_ENABLE_EXCEPTIONS "Enable the use of exceptions in the benchmark library." ON)
option(BENCHMARK_ENABLE_LTO "Enable link time optimisation of the benchmark library." OFF)
option(BENCHMARK_USE_LIBCXX "Build and test using libc++ as the standard library." OFF)
if(NOT MSVC)
option(BENCHMARK_ENABLE_WERROR "Build Release candidates with -Werror." ON)
option(BENCHMARK_FORCE_WERROR "Build Release candidates with -Werror regardless of compiler issues." OFF)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "PGI")
# PGC++ maybe reporting false positives.
set(BENCHMARK_ENABLE_WERROR OFF)
endif()
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "NVHPC")
set(BENCHMARK_ENABLE_WERROR OFF)
endif()
if(BENCHMARK_FORCE_WERROR)
set(BENCHMARK_ENABLE_WERROR ON)
endif(BENCHMARK_FORCE_WERROR)
if(NOT (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC"))
option(BENCHMARK_BUILD_32_BITS "Build a 32 bit version of the library." OFF)
else()
set(BENCHMARK_BUILD_32_BITS OFF CACHE BOOL "Build a 32 bit version of the library - unsupported when using MSVC)" FORCE)
endif()
option(BENCHMARK_ENABLE_INSTALL "Enable installation of benchmark. (Projects embedding benchmark may want to turn this OFF.)" ON)
option(BENCHMARK_ENABLE_DOXYGEN "Build documentation with Doxygen." OFF)
option(BENCHMARK_INSTALL_DOCS "Enable installation of documentation." ON)
# Allow unmet dependencies to be met using CMake's ExternalProject mechanics, which
# may require downloading the source code.
@@ -30,6 +37,24 @@ option(BENCHMARK_DOWNLOAD_DEPENDENCIES "Allow the downloading and in-tree buildi
# This option can be used to disable building and running unit tests which depend on gtest
# in cases where it is not possible to build or find a valid version of gtest.
option(BENCHMARK_ENABLE_GTEST_TESTS "Enable building the unit tests which depend on gtest" ON)
option(BENCHMARK_USE_BUNDLED_GTEST "Use bundled GoogleTest. If disabled, the find_package(GTest) will be used." ON)
option(BENCHMARK_ENABLE_LIBPFM "Enable performance counters provided by libpfm" OFF)
# Export only public symbols
set(CMAKE_CXX_VISIBILITY_PRESET hidden)
set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# As of CMake 3.18, CMAKE_SYSTEM_PROCESSOR is not set properly for MSVC and
# cross-compilation (e.g. Host=x86_64, target=aarch64) requires using the
# undocumented, but working variable.
# See https://gitlab.kitware.com/cmake/cmake/-/issues/15170
set(CMAKE_SYSTEM_PROCESSOR ${MSVC_CXX_ARCHITECTURE_ID})
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ARM")
set(CMAKE_CROSSCOMPILING TRUE)
endif()
endif()
set(ENABLE_ASSEMBLY_TESTS_DEFAULT OFF)
function(should_enable_assembly_tests)
@@ -41,7 +66,7 @@ function(should_enable_assembly_tests)
return()
endif()
endif()
if (MSVC)
if (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC")
return()
elseif(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
return()
@@ -77,29 +102,63 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
include(GetGitVersion)
get_git_version(GIT_VERSION)
# If no git version can be determined, use the version
# from the project() command
if ("${GIT_VERSION}" STREQUAL "v0.0.0")
set(VERSION "v${benchmark_VERSION}")
else()
set(VERSION "${GIT_VERSION}")
endif()
# Normalize version: drop "v" prefix, replace first "-" with ".",
# drop everything after second "-" (including said "-").
string(STRIP ${VERSION} VERSION)
if(VERSION MATCHES v[^-]*-)
string(REGEX REPLACE "v([^-]*)-([0-9]+)-.*" "\\1.\\2" NORMALIZED_VERSION ${VERSION})
else()
string(REGEX REPLACE "v(.*)" "\\1" NORMALIZED_VERSION ${VERSION})
endif()
# Tell the user what versions we are using
string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" VERSION ${GIT_VERSION})
message(STATUS "Version: ${VERSION}")
message(STATUS "Google Benchmark version: ${VERSION}, normalized to ${NORMALIZED_VERSION}")
# The version of the libraries
set(GENERIC_LIB_VERSION ${VERSION})
string(SUBSTRING ${VERSION} 0 1 GENERIC_LIB_SOVERSION)
set(GENERIC_LIB_VERSION ${NORMALIZED_VERSION})
string(SUBSTRING ${NORMALIZED_VERSION} 0 1 GENERIC_LIB_SOVERSION)
# Import our CMake modules
include(CheckCXXCompilerFlag)
include(AddCXXCompilerFlag)
include(CheckCXXCompilerFlag)
include(CheckLibraryExists)
include(CXXFeatureCheck)
check_library_exists(rt shm_open "" HAVE_LIB_RT)
if (BENCHMARK_BUILD_32_BITS)
add_required_cxx_compiler_flag(-m32)
endif()
set(BENCHMARK_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD ${BENCHMARK_CXX_STANDARD})
set(CMAKE_CXX_STANDARD_REQUIRED YES)
set(CMAKE_CXX_EXTENSIONS OFF)
if (MSVC)
# Turn compiler warnings up to 11
string(REGEX REPLACE "[-/]W[1-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
# MP flag only applies to cl, not cl frontends to other compilers (e.g. clang-cl, icx-cl etc)
if(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
endif()
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
if(BENCHMARK_ENABLE_WERROR)
add_cxx_compiler_flag(-WX)
endif()
if (NOT BENCHMARK_ENABLE_EXCEPTIONS)
add_cxx_compiler_flag(-EHs-)
add_cxx_compiler_flag(-EHa-)
@@ -126,45 +185,48 @@ if (MSVC)
set(CMAKE_EXE_LINKER_FLAGS_MINSIZEREL "${CMAKE_EXE_LINKER_FLAGS_MINSIZEREL} /LTCG")
endif()
else()
# Try and enable C++11. Don't use C++14 because it doesn't work in some
# configurations.
add_cxx_compiler_flag(-std=c++11)
if (NOT HAVE_CXX_FLAG_STD_CXX11)
add_cxx_compiler_flag(-std=c++0x)
endif()
# Turn on Large-file Support
add_definitions(-D_FILE_OFFSET_BITS=64)
add_definitions(-D_LARGEFILE64_SOURCE)
add_definitions(-D_LARGEFILE_SOURCE)
# Turn compiler warnings up to 11
if (NOT MSVC)
add_cxx_compiler_flag(-Wall)
add_cxx_compiler_flag(-Wextra)
add_cxx_compiler_flag(-Wshadow)
add_cxx_compiler_flag(-Werror RELEASE)
add_cxx_compiler_flag(-Werror RELWITHDEBINFO)
add_cxx_compiler_flag(-Werror MINSIZEREL)
add_cxx_compiler_flag(-pedantic)
add_cxx_compiler_flag(-pedantic-errors)
add_cxx_compiler_flag(-Wshorten-64-to-32)
add_cxx_compiler_flag(-fstrict-aliasing)
# Disable warnings regarding deprecated parts of the library while building
# and testing those parts of the library.
add_cxx_compiler_flag(-Wno-deprecated-declarations)
if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
# Intel silently ignores '-Wno-deprecated-declarations',
# warning no. 1786 must be explicitly disabled.
# See #631 for rationale.
add_cxx_compiler_flag(-wd1786)
endif()
# Disable deprecation warnings for release builds (when -Werror is enabled).
add_cxx_compiler_flag(-Wno-deprecated RELEASE)
add_cxx_compiler_flag(-Wno-deprecated RELWITHDEBINFO)
add_cxx_compiler_flag(-Wno-deprecated MINSIZEREL)
if (NOT BENCHMARK_ENABLE_EXCEPTIONS)
add_cxx_compiler_flag(-fno-exceptions)
endif()
add_cxx_compiler_flag(-Wall)
add_cxx_compiler_flag(-Wextra)
add_cxx_compiler_flag(-Wshadow)
add_cxx_compiler_flag(-Wfloat-equal)
add_cxx_compiler_flag(-Wold-style-cast)
add_cxx_compiler_flag(-Wconversion)
if(BENCHMARK_ENABLE_WERROR)
add_cxx_compiler_flag(-Werror)
endif()
if (NOT BENCHMARK_ENABLE_TESTING)
# Disable warning when compiling tests as gtest does not use 'override'.
add_cxx_compiler_flag(-Wsuggest-override)
endif()
add_cxx_compiler_flag(-pedantic)
add_cxx_compiler_flag(-pedantic-errors)
add_cxx_compiler_flag(-Wshorten-64-to-32)
add_cxx_compiler_flag(-fstrict-aliasing)
# Disable warnings regarding deprecated parts of the library while building
# and testing those parts of the library.
add_cxx_compiler_flag(-Wno-deprecated-declarations)
if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel" OR CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
# Intel silently ignores '-Wno-deprecated-declarations',
# warning no. 1786 must be explicitly disabled.
# See #631 for rationale.
add_cxx_compiler_flag(-wd1786)
add_cxx_compiler_flag(-fno-finite-math-only)
endif()
# Disable deprecation warnings for release builds (when -Werror is enabled).
if(BENCHMARK_ENABLE_WERROR)
add_cxx_compiler_flag(-Wno-deprecated)
endif()
if (NOT BENCHMARK_ENABLE_EXCEPTIONS)
add_cxx_compiler_flag(-fno-exceptions)
endif()
if (HAVE_CXX_FLAG_FSTRICT_ALIASING)
if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel") #ICC17u2: Many false positives for Wstrict-aliasing
if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel" AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") #ICC17u2: Many false positives for Wstrict-aliasing
add_cxx_compiler_flag(-Wstrict-aliasing)
endif()
endif()
@@ -173,21 +235,26 @@ else()
add_cxx_compiler_flag(-wd654)
add_cxx_compiler_flag(-Wthread-safety)
if (HAVE_CXX_FLAG_WTHREAD_SAFETY)
cxx_feature_check(THREAD_SAFETY_ATTRIBUTES)
cxx_feature_check(THREAD_SAFETY_ATTRIBUTES "-DINCLUDE_DIRECTORIES=${PROJECT_SOURCE_DIR}/include")
endif()
# On most UNIX like platforms g++ and clang++ define _GNU_SOURCE as a
# predefined macro, which turns on all of the wonderful libc extensions.
# However g++ doesn't do this in Cygwin so we have to define it ourselfs
# However g++ doesn't do this in Cygwin so we have to define it ourselves
# since we depend on GNU/POSIX/BSD extensions.
if (CYGWIN)
add_definitions(-D_GNU_SOURCE=1)
endif()
if (QNXNTO)
add_definitions(-D_QNX_SOURCE)
endif()
# Link time optimisation
if (BENCHMARK_ENABLE_LTO)
add_cxx_compiler_flag(-flto)
if ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU")
add_cxx_compiler_flag(-Wno-lto-type-mismatch)
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
find_program(GCC_AR gcc-ar)
if (GCC_AR)
set(CMAKE_AR ${GCC_AR})
@@ -196,7 +263,7 @@ else()
if (GCC_RANLIB)
set(CMAKE_RANLIB ${GCC_RANLIB})
endif()
elseif("${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
elseif("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
include(llvm-toolchain)
endif()
endif()
@@ -224,7 +291,8 @@ if (BENCHMARK_USE_LIBCXX)
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
add_cxx_compiler_flag(-stdlib=libc++)
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR
"${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
"${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel" OR
"${CMAKE_CXX_COMPILER_ID}" STREQUAL "IntelLLVM")
add_cxx_compiler_flag(-nostdinc++)
message(WARNING "libc++ header path must be manually specified using CMAKE_CXX_FLAGS")
# Adding -nodefaultlibs directly to CMAKE_<TYPE>_LINKER_FLAGS will break
@@ -250,9 +318,16 @@ if (NOT BENCHMARK_ENABLE_EXCEPTIONS AND HAVE_STD_REGEX
AND NOT HAVE_GNU_POSIX_REGEX AND NOT HAVE_POSIX_REGEX)
message(WARNING "Using std::regex with exceptions disabled is not fully supported")
endif()
cxx_feature_check(STEADY_CLOCK)
# Ensure we have pthreads
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
cxx_feature_check(PTHREAD_AFFINITY)
if (BENCHMARK_ENABLE_LIBPFM)
find_package(PFM REQUIRED)
endif()
# Set up directories
include_directories(${PROJECT_SOURCE_DIR}/include)
@@ -262,8 +337,18 @@ add_subdirectory(src)
if (BENCHMARK_ENABLE_TESTING)
enable_testing()
if (BENCHMARK_ENABLE_GTEST_TESTS)
include(HandleGTest)
if (BENCHMARK_ENABLE_GTEST_TESTS AND
NOT (TARGET gtest AND TARGET gtest_main AND
TARGET gmock AND TARGET gmock_main))
if (BENCHMARK_USE_BUNDLED_GTEST)
include(GoogleTest)
else()
find_package(GTest CONFIG REQUIRED)
add_library(gtest ALIAS GTest::gtest)
add_library(gtest_main ALIAS GTest::gtest_main)
add_library(gmock ALIAS GTest::gmock)
add_library(gmock_main ALIAS GTest::gmock_main)
endif()
endif()
add_subdirectory(test)
endif()

View File

@@ -22,44 +22,75 @@
#
# Please keep the list sorted.
Abhina Sreeskantharajan <abhina.sreeskantharajan@ibm.com>
Albert Pretorius <pretoalb@gmail.com>
Alex Steele <steelal123@gmail.com>
Andriy Berestovskyy <berestovskyy@gmail.com>
Arne Beer <arne@twobeer.de>
Bátor Tallér <bator.taller@shapr3d.com>
Billy Robert O'Neal III <billy.oneal@gmail.com> <bion@microsoft.com>
Cezary Skrzyński <czars1988@gmail.com>
Chris Kennelly <ckennelly@google.com> <ckennelly@ckennelly.com>
Christian Wassermann <christian_wassermann@web.de>
Christopher Seymour <chris.j.seymour@hotmail.com>
Colin Braley <braley.colin@gmail.com>
Cyrille Faucheux <cyrille.faucheux@gmail.com>
Daniel Harvey <danielharvey458@gmail.com>
David Coeurjolly <david.coeurjolly@liris.cnrs.fr>
Deniz Evrenci <denizevrenci@gmail.com>
Dominic Hamon <dma@stripysock.com> <dominic@google.com>
Dominik Czarnota <dominik.b.czarnota@gmail.com>
Dominik Korman <kormandominik@gmail.com>
Donald Aingworth <donalds_junk_mail@yahoo.com>
Doug Evans <xdje42@gmail.com>
Eric Backus <eric_backus@alum.mit.edu>
Eric Fiselier <eric@efcs.ca>
Eugene Zhuk <eugene.zhuk@gmail.com>
Evgeny Safronov <division494@gmail.com>
Fabien Pichot <pichot.fabien@gmail.com>
Fanbo Meng <fanbo.meng@ibm.com>
Federico Ficarelli <federico.ficarelli@gmail.com>
Felix Homann <linuxaudio@showlabor.de>
Geoffrey Martin-Noble <gcmn@google.com> <gmngeoffrey@gmail.com>
Gergely Meszaros <maetveis@gmail.com>
Gergő Szitár <szitar.gergo@gmail.com>
Hannes Hauswedell <h2@fsfe.org>
Henrique Bucher <hbucher@gmail.com>
Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com>
Iakov Sergeev <yahontu@gmail.com>
Jern-Kuan Leong <jernkuan@gmail.com>
JianXiong Zhou <zhoujianxiong2@gmail.com>
Joao Paulo Magalhaes <joaoppmagalhaes@gmail.com>
John Millikin <jmillikin@stripe.com>
Jordan Williams <jwillikers@protonmail.com>
Jussi Knuuttila <jussi.knuuttila@gmail.com>
Kaito Udagawa <umireon@gmail.com>
Kai Wolf <kai.wolf@gmail.com>
Kishan Kumar <kumar.kishan@outlook.com>
Kaito Udagawa <umireon@gmail.com>
Lei Xu <eddyxu@gmail.com>
Marcel Jacobse <mjacobse@uni-bremen.de>
Matt Clarkson <mattyclarkson@gmail.com>
Maxim Vafin <maxvafin@gmail.com>
Mike Apodaca <gatorfax@gmail.com>
Min-Yih Hsu <yihshyng223@gmail.com>
Nick Hutchinson <nshutchinson@gmail.com>
Norman Heino <norman.heino@gmail.com>
Oleksandr Sochka <sasha.sochka@gmail.com>
Ori Livneh <ori.livneh@gmail.com>
Pascal Leroy <phl@google.com>
Paul Redmond <paul.redmond@gmail.com>
Pierre Phaneuf <pphaneuf@google.com>
Radoslav Yovchev <radoslav.tm@gmail.com>
Raghu Raja <raghu@enfabrica.net>
Rainer Orth <ro@cebitec.uni-bielefeld.de>
Raul Marin <rmrodriguez@cartodb.com>
Ray Glover <ray.glover@uk.ibm.com>
Robert Guo <robert.guo@mongodb.com>
Roman Lebedev <lebedev.ri@gmail.com>
Sayan Bhattacharjee <aero.sayan@gmail.com>
Shuo Chen <chenshuo@chenshuo.com>
Steven Wan <wan.yu@ibm.com>
Tobias Schmidt <tobias.schmidt@in.tum.de>
Tobias Ulvgård <tobias.ulvgard@dirac.se>
Tom Madams <tom.ej.madams@gmail.com> <tmadams@google.com>
Yixuan Qiu <yixuanq@gmail.com>

41
third_party/benchmark/MODULE.bazel vendored Normal file
View File

@@ -0,0 +1,41 @@
module(
name = "google_benchmark",
version = "1.9.4",
)
bazel_dep(name = "bazel_skylib", version = "1.7.1")
bazel_dep(name = "platforms", version = "0.0.10")
bazel_dep(name = "rules_cc", version = "0.0.9")
bazel_dep(name = "rules_python", version = "1.0.0", dev_dependency = True)
bazel_dep(name = "googletest", version = "1.14.0", dev_dependency = True, repo_name = "com_google_googletest")
bazel_dep(name = "libpfm", version = "4.11.0.bcr.1")
# Register a toolchain for Python 3.9 to be able to build numpy. Python
# versions >=3.10 are problematic.
# A second reason for this is to be able to build Python hermetically instead
# of relying on the changing default version from rules_python.
python = use_extension("@rules_python//python/extensions:python.bzl", "python", dev_dependency = True)
python.toolchain(python_version = "3.8")
python.toolchain(python_version = "3.9")
python.toolchain(python_version = "3.10")
python.toolchain(python_version = "3.11")
python.toolchain(
is_default = True,
python_version = "3.12",
)
python.toolchain(python_version = "3.13")
pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip", dev_dependency = True)
pip.parse(
hub_name = "tools_pip_deps",
python_version = "3.9",
requirements_lock = "//tools:requirements.txt",
)
use_repo(pip, "tools_pip_deps")
# -- bazel_dep definitions -- #
bazel_dep(name = "nanobind_bazel", version = "2.7.0", dev_dependency = True)

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,20 @@
workspace(name = "com_github_google_benchmark")
http_archive(
name = "com_google_googletest",
urls = ["https://github.com/google/googletest/archive/3f0cf6b62ad1eb50d8736538363d3580dd640c3e.zip"],
strip_prefix = "googletest-3f0cf6b62ad1eb50d8736538363d3580dd640c3e",
load("//:bazel/benchmark_deps.bzl", "benchmark_deps")
benchmark_deps()
load("@rules_python//python:repositories.bzl", "py_repositories")
py_repositories()
load("@rules_python//python:pip.bzl", "pip_parse")
pip_parse(
name = "tools_pip_deps",
requirements_lock = "//tools:requirements.txt",
)
load("@tools_pip_deps//:requirements.bzl", "install_deps")
install_deps()

View File

@@ -0,0 +1,2 @@
# This file marks the root of the Bazel workspace.
# See MODULE.bazel for dependencies and setup.

2
third_party/benchmark/_config.yml vendored Normal file
View File

@@ -0,0 +1,2 @@
theme: jekyll-theme-midnight
markdown: GFM

View File

@@ -41,7 +41,7 @@ build_script:
- cmake --build . --config %configuration%
test_script:
- ctest -c %configuration% --timeout 300 --output-on-failure
- ctest --build-config %configuration% --timeout 300 --output-on-failure
artifacts:
- path: '_build/CMakeFiles/*.log'

View File

@@ -0,0 +1,54 @@
"""
This file contains the Bazel build dependencies for Google Benchmark (both C++ source and Python bindings).
"""
load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
def benchmark_deps():
"""Loads dependencies required to build Google Benchmark."""
if "bazel_skylib" not in native.existing_rules():
http_archive(
name = "bazel_skylib",
sha256 = "cd55a062e763b9349921f0f5db8c3933288dc8ba4f76dd9416aac68acee3cb94",
urls = [
"https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.5.0/bazel-skylib-1.5.0.tar.gz",
"https://github.com/bazelbuild/bazel-skylib/releases/download/1.5.0/bazel-skylib-1.5.0.tar.gz",
],
)
if "rules_python" not in native.existing_rules():
http_archive(
name = "rules_python",
sha256 = "e85ae30de33625a63eca7fc40a94fea845e641888e52f32b6beea91e8b1b2793",
strip_prefix = "rules_python-0.27.1",
url = "https://github.com/bazelbuild/rules_python/releases/download/0.27.1/rules_python-0.27.1.tar.gz",
)
if "com_google_googletest" not in native.existing_rules():
new_git_repository(
name = "com_google_googletest",
remote = "https://github.com/google/googletest.git",
tag = "release-1.12.1",
)
if "nanobind" not in native.existing_rules():
new_git_repository(
name = "nanobind",
remote = "https://github.com/wjakob/nanobind.git",
tag = "v1.9.2",
build_file = "@//bindings/python:nanobind.BUILD",
recursive_init_submodules = True,
)
if "libpfm" not in native.existing_rules():
# Downloaded from v4.9.0 tag at https://sourceforge.net/p/perfmon2/libpfm4/ref/master/tags/
http_archive(
name = "libpfm",
build_file = str(Label("//tools:libpfm.BUILD.bazel")),
sha256 = "5da5f8872bde14b3634c9688d980f68bda28b510268723cc12973eedbab9fecc",
type = "tar.gz",
strip_prefix = "libpfm-4.11.0",
urls = ["https://sourceforge.net/projects/perfmon2/files/libpfm4/libpfm-4.11.0.tar.gz/download"],
)

View File

@@ -0,0 +1,34 @@
load("@nanobind_bazel//:build_defs.bzl", "nanobind_extension", "nanobind_stubgen")
load("@rules_python//python:defs.bzl", "py_library", "py_test")
py_library(
name = "google_benchmark",
srcs = ["__init__.py"],
visibility = ["//visibility:public"],
deps = [
":_benchmark",
],
)
nanobind_extension(
name = "_benchmark",
srcs = ["benchmark.cc"],
deps = ["//:benchmark"],
)
nanobind_stubgen(
name = "benchmark_stubgen",
marker_file = "bindings/python/google_benchmark/py.typed",
module = ":_benchmark",
)
py_test(
name = "example",
srcs = ["example.py"],
python_version = "PY3",
srcs_version = "PY3",
visibility = ["//visibility:public"],
deps = [
":google_benchmark",
],
)

View File

@@ -0,0 +1,145 @@
# Copyright 2020 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Python benchmarking utilities.
Example usage:
import google_benchmark as benchmark
@benchmark.register
def my_benchmark(state):
... # Code executed outside `while` loop is not timed.
while state:
... # Code executed within `while` loop is timed.
if __name__ == '__main__':
benchmark.main()
"""
import atexit
from absl import app
from google_benchmark import _benchmark
from google_benchmark._benchmark import (
Counter as Counter,
State as State,
kMicrosecond as kMicrosecond,
kMillisecond as kMillisecond,
kNanosecond as kNanosecond,
kSecond as kSecond,
o1 as o1,
oAuto as oAuto,
oLambda as oLambda,
oLogN as oLogN,
oN as oN,
oNCubed as oNCubed,
oNLogN as oNLogN,
oNone as oNone,
oNSquared as oNSquared,
)
__version__ = "1.9.4"
class __OptionMaker:
"""A stateless class to collect benchmark options.
Collect all decorator calls like @option.range(start=0, limit=1<<5).
"""
class Options:
"""Pure data class to store options calls, along with the benchmarked
function."""
def __init__(self, func):
self.func = func
self.builder_calls = []
@classmethod
def make(cls, func_or_options):
"""Make Options from Options or the benchmarked function."""
if isinstance(func_or_options, cls.Options):
return func_or_options
return cls.Options(func_or_options)
def __getattr__(self, builder_name):
"""Append option call in the Options."""
# The function that get returned on @option.range(start=0, limit=1<<5).
def __builder_method(*args, **kwargs):
# The decorator that get called, either with the benchmared function
# or the previous Options
def __decorator(func_or_options):
options = self.make(func_or_options)
options.builder_calls.append((builder_name, args, kwargs))
# The decorator returns Options so it is not technically a
# decorator and needs a final call to @register
return options
return __decorator
return __builder_method
# Alias for nicer API.
# We have to instantiate an object, even if stateless, to be able to use
# __getattr__ on option.range
option = __OptionMaker()
def register(undefined=None, *, name=None):
"""Register function for benchmarking."""
if undefined is None:
# Decorator is called without parenthesis so we return a decorator
return lambda f: register(f, name=name)
# We have either the function to benchmark (simple case) or an instance of
# Options (@option._ case).
options = __OptionMaker.make(undefined)
if name is None:
name = options.func.__name__
# We register the benchmark and reproduce all the @option._ calls onto the
# benchmark builder pattern
benchmark = _benchmark.RegisterBenchmark(name, options.func)
for name, args, kwargs in options.builder_calls[::-1]:
getattr(benchmark, name)(*args, **kwargs)
# return the benchmarked function because the decorator does not modify it
return options.func
def _flags_parser(argv):
argv = _benchmark.Initialize(argv)
return app.parse_flags_with_usage(argv)
def _run_benchmarks(argv):
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")
return _benchmark.RunSpecifiedBenchmarks()
def main(argv=None):
return app.run(_run_benchmarks, argv=argv, flags_parser=_flags_parser)
# FIXME: can we rerun with disabled ASLR?
# Methods for use with custom main function.
initialize = _benchmark.Initialize
run_benchmarks = _benchmark.RunSpecifiedBenchmarks
atexit.register(_benchmark.ClearRegisteredBenchmarks)

View File

@@ -0,0 +1,184 @@
// Benchmark for Python.
#include "benchmark/benchmark.h"
#include "nanobind/nanobind.h"
#include "nanobind/operators.h"
#include "nanobind/stl/bind_map.h"
#include "nanobind/stl/string.h"
#include "nanobind/stl/vector.h"
NB_MAKE_OPAQUE(benchmark::UserCounters);
namespace {
namespace nb = nanobind;
std::vector<std::string> Initialize(const std::vector<std::string>& argv) {
// The `argv` pointers here become invalid when this function returns, but
// benchmark holds the pointer to `argv[0]`. We create a static copy of it
// so it persists, and replace the pointer below.
static std::string executable_name(argv[0]);
std::vector<char*> ptrs;
ptrs.reserve(argv.size());
for (auto& arg : argv) {
ptrs.push_back(const_cast<char*>(arg.c_str()));
}
ptrs[0] = const_cast<char*>(executable_name.c_str());
int argc = static_cast<int>(argv.size());
benchmark::Initialize(&argc, ptrs.data());
std::vector<std::string> remaining_argv;
remaining_argv.reserve(argc);
for (int i = 0; i < argc; ++i) {
remaining_argv.emplace_back(ptrs[i]);
}
return remaining_argv;
}
benchmark::internal::Benchmark* RegisterBenchmark(const std::string& name,
nb::callable f) {
return benchmark::RegisterBenchmark(
name, [f](benchmark::State& state) { f(&state); });
}
NB_MODULE(_benchmark, m) {
using benchmark::TimeUnit;
nb::enum_<TimeUnit>(m, "TimeUnit")
.value("kNanosecond", TimeUnit::kNanosecond)
.value("kMicrosecond", TimeUnit::kMicrosecond)
.value("kMillisecond", TimeUnit::kMillisecond)
.value("kSecond", TimeUnit::kSecond)
.export_values();
using benchmark::BigO;
nb::enum_<BigO>(m, "BigO")
.value("oNone", BigO::oNone)
.value("o1", BigO::o1)
.value("oN", BigO::oN)
.value("oNSquared", BigO::oNSquared)
.value("oNCubed", BigO::oNCubed)
.value("oLogN", BigO::oLogN)
.value("oNLogN", BigO::oNLogN)
.value("oAuto", BigO::oAuto)
.value("oLambda", BigO::oLambda)
.export_values();
using benchmark::internal::Benchmark;
nb::class_<Benchmark>(m, "Benchmark")
// For methods returning a pointer to the current object, reference
// return policy is used to ask nanobind not to take ownership of the
// returned object and avoid calling delete on it.
// https://pybind11.readthedocs.io/en/stable/advanced/functions.html#return-value-policies
//
// For methods taking a const std::vector<...>&, a copy is created
// because a it is bound to a Python list.
// https://pybind11.readthedocs.io/en/stable/advanced/cast/stl.html
.def("unit", &Benchmark::Unit, nb::rv_policy::reference)
.def("arg", &Benchmark::Arg, nb::rv_policy::reference)
.def("args", &Benchmark::Args, nb::rv_policy::reference)
.def("range", &Benchmark::Range, nb::rv_policy::reference,
nb::arg("start"), nb::arg("limit"))
.def("dense_range", &Benchmark::DenseRange,
nb::rv_policy::reference, nb::arg("start"),
nb::arg("limit"), nb::arg("step") = 1)
.def("ranges", &Benchmark::Ranges, nb::rv_policy::reference)
.def("args_product", &Benchmark::ArgsProduct,
nb::rv_policy::reference)
.def("arg_name", &Benchmark::ArgName, nb::rv_policy::reference)
.def("arg_names", &Benchmark::ArgNames,
nb::rv_policy::reference)
.def("range_pair", &Benchmark::RangePair,
nb::rv_policy::reference, nb::arg("lo1"), nb::arg("hi1"),
nb::arg("lo2"), nb::arg("hi2"))
.def("range_multiplier", &Benchmark::RangeMultiplier,
nb::rv_policy::reference)
.def("min_time", &Benchmark::MinTime, nb::rv_policy::reference)
.def("min_warmup_time", &Benchmark::MinWarmUpTime,
nb::rv_policy::reference)
.def("iterations", &Benchmark::Iterations,
nb::rv_policy::reference)
.def("repetitions", &Benchmark::Repetitions,
nb::rv_policy::reference)
.def("report_aggregates_only", &Benchmark::ReportAggregatesOnly,
nb::rv_policy::reference, nb::arg("value") = true)
.def("display_aggregates_only", &Benchmark::DisplayAggregatesOnly,
nb::rv_policy::reference, nb::arg("value") = true)
.def("measure_process_cpu_time", &Benchmark::MeasureProcessCPUTime,
nb::rv_policy::reference)
.def("use_real_time", &Benchmark::UseRealTime,
nb::rv_policy::reference)
.def("use_manual_time", &Benchmark::UseManualTime,
nb::rv_policy::reference)
.def(
"complexity",
(Benchmark * (Benchmark::*)(benchmark::BigO)) & Benchmark::Complexity,
nb::rv_policy::reference,
nb::arg("complexity") = benchmark::oAuto);
using benchmark::Counter;
nb::class_<Counter> py_counter(m, "Counter");
nb::enum_<Counter::Flags>(py_counter, "Flags", nb::is_arithmetic(), nb::is_flag())
.value("kDefaults", Counter::Flags::kDefaults)
.value("kIsRate", Counter::Flags::kIsRate)
.value("kAvgThreads", Counter::Flags::kAvgThreads)
.value("kAvgThreadsRate", Counter::Flags::kAvgThreadsRate)
.value("kIsIterationInvariant", Counter::Flags::kIsIterationInvariant)
.value("kIsIterationInvariantRate",
Counter::Flags::kIsIterationInvariantRate)
.value("kAvgIterations", Counter::Flags::kAvgIterations)
.value("kAvgIterationsRate", Counter::Flags::kAvgIterationsRate)
.value("kInvert", Counter::Flags::kInvert)
.export_values();
nb::enum_<Counter::OneK>(py_counter, "OneK")
.value("kIs1000", Counter::OneK::kIs1000)
.value("kIs1024", Counter::OneK::kIs1024)
.export_values();
py_counter
.def(nb::init<double, Counter::Flags, Counter::OneK>(),
nb::arg("value") = 0., nb::arg("flags") = Counter::kDefaults,
nb::arg("k") = Counter::kIs1000)
.def("__init__",
([](Counter* c, double value) { new (c) Counter(value); }))
.def_rw("value", &Counter::value)
.def_rw("flags", &Counter::flags)
.def_rw("oneK", &Counter::oneK)
.def(nb::init_implicit<double>());
nb::implicitly_convertible<nb::int_, Counter>();
nb::bind_map<benchmark::UserCounters>(m, "UserCounters");
using benchmark::State;
nb::class_<State>(m, "State")
.def("__bool__", &State::KeepRunning)
.def_prop_ro("keep_running", &State::KeepRunning)
.def("pause_timing", &State::PauseTiming)
.def("resume_timing", &State::ResumeTiming)
.def("skip_with_error", &State::SkipWithError)
.def_prop_ro("error_occurred", &State::error_occurred)
.def("set_iteration_time", &State::SetIterationTime)
.def_prop_rw("bytes_processed", &State::bytes_processed,
&State::SetBytesProcessed)
.def_prop_rw("complexity_n", &State::complexity_length_n,
&State::SetComplexityN)
.def_prop_rw("items_processed", &State::items_processed,
&State::SetItemsProcessed)
.def("set_label", &State::SetLabel)
.def("range", &State::range, nb::arg("pos") = 0)
.def_prop_ro("iterations", &State::iterations)
.def_prop_ro("name", &State::name)
.def_rw("counters", &State::counters)
.def_prop_ro("thread_index", &State::thread_index)
.def_prop_ro("threads", &State::threads);
m.def("Initialize", Initialize);
m.def("RegisterBenchmark", RegisterBenchmark,
nb::rv_policy::reference);
m.def("RunSpecifiedBenchmarks",
[]() { benchmark::RunSpecifiedBenchmarks(); });
m.def("ClearRegisteredBenchmarks", benchmark::ClearRegisteredBenchmarks);
};
} // namespace

View File

@@ -0,0 +1,140 @@
# Copyright 2020 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Example of Python using C++ benchmark framework.
To run this example, you must first install the `google_benchmark` Python
package.
To install using `setup.py`, download and extract the `google_benchmark` source.
In the extracted directory, execute:
python setup.py install
"""
import random
import time
import google_benchmark as benchmark
from google_benchmark import Counter
@benchmark.register
def empty(state):
while state:
pass
@benchmark.register
def sum_million(state):
while state:
sum(range(1_000_000))
@benchmark.register
def pause_timing(state):
"""Pause timing every iteration."""
while state:
# Construct a list of random ints every iteration without timing it
state.pause_timing()
random_list = [random.randint(0, 100) for _ in range(100)]
state.resume_timing()
# Time the in place sorting algorithm
random_list.sort()
@benchmark.register
def skipped(state):
if True: # Test some predicate here.
state.skip_with_error("some error")
return # NOTE: You must explicitly return, or benchmark will continue.
# Benchmark code would be here.
@benchmark.register
@benchmark.option.use_manual_time()
def manual_timing(state):
while state:
# Manually count Python CPU time
start = time.perf_counter() # perf_counter_ns() in Python 3.7+
# Something to benchmark
time.sleep(0.01)
end = time.perf_counter()
state.set_iteration_time(end - start)
@benchmark.register
def custom_counters(state):
"""Collect custom metric using benchmark.Counter."""
num_foo = 0.0
while state:
# Benchmark some code here
# Collect some custom metric named foo
num_foo += 0.13
# Automatic Counter from numbers.
state.counters["foo"] = num_foo
# Set a counter as a rate.
state.counters["foo_rate"] = Counter(num_foo, Counter.kIsRate)
# Set a counter as an inverse of rate.
state.counters["foo_inv_rate"] = Counter(
num_foo, Counter.kIsRate | Counter.kInvert
)
# Set a counter as a thread-average quantity.
state.counters["foo_avg"] = Counter(num_foo, Counter.kAvgThreads)
# There's also a combined flag:
state.counters["foo_avg_rate"] = Counter(num_foo, Counter.kAvgThreadsRate)
@benchmark.register
@benchmark.option.measure_process_cpu_time()
@benchmark.option.use_real_time()
def with_options(state):
while state:
sum(range(1_000_000))
@benchmark.register(name="sum_million_microseconds")
@benchmark.option.unit(benchmark.kMicrosecond)
def with_options2(state):
while state:
sum(range(1_000_000))
@benchmark.register
@benchmark.option.arg(100)
@benchmark.option.arg(1000)
def passing_argument(state):
while state:
sum(range(state.range(0)))
@benchmark.register
@benchmark.option.range(8, limit=8 << 10)
def using_range(state):
while state:
sum(range(state.range(0)))
@benchmark.register
@benchmark.option.range_multiplier(2)
@benchmark.option.range(1 << 10, 1 << 18)
@benchmark.option.complexity(benchmark.oN)
def computing_complexity(state):
while state:
sum(range(state.range(0)))
state.complexity_n = state.range(0)
if __name__ == "__main__":
benchmark.main()

View File

@@ -34,9 +34,11 @@ function(add_cxx_compiler_flag FLAG)
check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG})
set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}")
if(${MANGLED_FLAG})
set(VARIANT ${ARGV1})
if(ARGV1)
if(ARGC GREATER 1)
set(VARIANT ${ARGV1})
string(TOUPPER "_${VARIANT}" VARIANT)
else()
set(VARIANT "")
endif()
set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${BENCHMARK_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE)
endif()
@@ -49,9 +51,11 @@ function(add_required_cxx_compiler_flag FLAG)
check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG})
set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}")
if(${MANGLED_FLAG})
set(VARIANT ${ARGV1})
if(ARGV1)
if(ARGC GREATER 1)
set(VARIANT ${ARGV1})
string(TOUPPER "_${VARIANT}" VARIANT)
else()
set(VARIANT "")
endif()
set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FLAG}" PARENT_SCOPE)

View File

@@ -17,6 +17,8 @@ if(__cxx_feature_check)
endif()
set(__cxx_feature_check INCLUDED)
option(CXXFEATURECHECK_DEBUG OFF)
function(cxx_feature_check FILE)
string(TOLOWER ${FILE} FILE)
string(TOUPPER ${FILE} VAR)
@@ -27,26 +29,38 @@ function(cxx_feature_check FILE)
return()
endif()
set(FEATURE_CHECK_CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS})
if (ARGC GREATER 1)
message(STATUS "Enabling additional flags: ${ARGV1}")
list(APPEND FEATURE_CHECK_CMAKE_FLAGS ${ARGV1})
endif()
if (NOT DEFINED COMPILE_${FEATURE})
message(STATUS "Performing Test ${FEATURE}")
if(CMAKE_CROSSCOMPILING)
message(STATUS "Cross-compiling to test ${FEATURE}")
try_compile(COMPILE_${FEATURE}
${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}
LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES})
CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS}
LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}
OUTPUT_VARIABLE COMPILE_OUTPUT_VAR)
if(COMPILE_${FEATURE})
message(WARNING
"If you see build failures due to cross compilation, try setting HAVE_${VAR} to 0")
set(RUN_${FEATURE} 0)
set(RUN_${FEATURE} 0 CACHE INTERNAL "")
else()
set(RUN_${FEATURE} 1)
set(RUN_${FEATURE} 1 CACHE INTERNAL "")
endif()
else()
message(STATUS "Performing Test ${FEATURE}")
message(STATUS "Compiling and running to test ${FEATURE}")
try_run(RUN_${FEATURE} COMPILE_${FEATURE}
${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}
LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES})
CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS}
LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}
COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR)
endif()
endif()
@@ -56,7 +70,11 @@ function(cxx_feature_check FILE)
add_definitions(-DHAVE_${VAR})
else()
if(NOT COMPILE_${FEATURE})
message(STATUS "Performing Test ${FEATURE} -- failed to compile")
if(CXXFEATURECHECK_DEBUG)
message(STATUS "Performing Test ${FEATURE} -- failed to compile: ${COMPILE_OUTPUT_VAR}")
else()
message(STATUS "Performing Test ${FEATURE} -- failed to compile")
endif()
else()
message(STATUS "Performing Test ${FEATURE} -- compiled but failed to run")
endif()

View File

@@ -1 +1,12 @@
@PACKAGE_INIT@
include (CMakeFindDependencyMacro)
find_dependency (Threads)
if (@BENCHMARK_ENABLE_LIBPFM@)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}")
find_dependency (PFM)
endif()
include("${CMAKE_CURRENT_LIST_DIR}/@targets_export_name@.cmake")

View File

@@ -20,35 +20,17 @@ set(__get_git_version INCLUDED)
function(get_git_version var)
if(GIT_EXECUTABLE)
execute_process(COMMAND ${GIT_EXECUTABLE} describe --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8
execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8 --dirty
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
RESULT_VARIABLE status
OUTPUT_VARIABLE GIT_VERSION
ERROR_QUIET)
if(${status})
if(status)
set(GIT_VERSION "v0.0.0")
else()
string(STRIP ${GIT_VERSION} GIT_VERSION)
string(REGEX REPLACE "-[0-9]+-g" "-" GIT_VERSION ${GIT_VERSION})
endif()
# Work out if the repository is dirty
execute_process(COMMAND ${GIT_EXECUTABLE} update-index -q --refresh
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
OUTPUT_QUIET
ERROR_QUIET)
execute_process(COMMAND ${GIT_EXECUTABLE} diff-index --name-only HEAD --
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
OUTPUT_VARIABLE GIT_DIFF_INDEX
ERROR_QUIET)
string(COMPARE NOTEQUAL "${GIT_DIFF_INDEX}" "" GIT_DIRTY)
if (${GIT_DIRTY})
set(GIT_VERSION "${GIT_VERSION}-dirty")
endif()
else()
set(GIT_VERSION "v0.0.0")
endif()
message(STATUS "git Version: ${GIT_VERSION}")
set(${var} ${GIT_VERSION} PARENT_SCOPE)
endfunction()

View File

@@ -0,0 +1,58 @@
# Download and unpack googletest at configure time
set(GOOGLETEST_PREFIX "${benchmark_BINARY_DIR}/third_party/googletest")
configure_file(${benchmark_SOURCE_DIR}/cmake/GoogleTest.cmake.in ${GOOGLETEST_PREFIX}/CMakeLists.txt @ONLY)
set(GOOGLETEST_PATH "${CMAKE_CURRENT_SOURCE_DIR}/googletest" CACHE PATH "") # Mind the quotes
execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}"
-DALLOW_DOWNLOADING_GOOGLETEST=${BENCHMARK_DOWNLOAD_DEPENDENCIES} -DGOOGLETEST_PATH:PATH=${GOOGLETEST_PATH} .
RESULT_VARIABLE result
WORKING_DIRECTORY ${GOOGLETEST_PREFIX}
)
if(result)
message(FATAL_ERROR "CMake step for googletest failed: ${result}")
endif()
execute_process(
COMMAND ${CMAKE_COMMAND} --build .
RESULT_VARIABLE result
WORKING_DIRECTORY ${GOOGLETEST_PREFIX}
)
if(result)
message(FATAL_ERROR "Build step for googletest failed: ${result}")
endif()
# Prevent overriding the parent project's compiler/linker
# settings on Windows
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
include(${GOOGLETEST_PREFIX}/googletest-paths.cmake)
# Add googletest directly to our build. This defines
# the gtest and gtest_main targets.
add_subdirectory(${GOOGLETEST_SOURCE_DIR}
${GOOGLETEST_BINARY_DIR}
EXCLUDE_FROM_ALL)
# googletest doesn't seem to want to stay build warning clean so let's not hurt ourselves.
if (MSVC)
target_compile_options(gtest PRIVATE "/wd4244" "/wd4722")
target_compile_options(gtest_main PRIVATE "/wd4244" "/wd4722")
target_compile_options(gmock PRIVATE "/wd4244" "/wd4722")
target_compile_options(gmock_main PRIVATE "/wd4244" "/wd4722")
else()
target_compile_options(gtest PRIVATE "-w")
target_compile_options(gtest_main PRIVATE "-w")
target_compile_options(gmock PRIVATE "-w")
target_compile_options(gmock_main PRIVATE "-w")
endif()
if(NOT DEFINED GTEST_COMPILE_COMMANDS)
set(GTEST_COMPILE_COMMANDS ON)
endif()
set_target_properties(gtest PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gtest,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS})
set_target_properties(gtest_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gtest_main,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS})
set_target_properties(gmock PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gmock,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS})
set_target_properties(gmock_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gmock_main,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS})

View File

@@ -0,0 +1,60 @@
cmake_minimum_required (VERSION 3.13...3.22)
project(googletest-download NONE)
# Enable ExternalProject CMake module
include(ExternalProject)
option(ALLOW_DOWNLOADING_GOOGLETEST "If googletest src tree is not found in location specified by GOOGLETEST_PATH, do fetch the archive from internet" OFF)
set(GOOGLETEST_PATH "/usr/src/googletest" CACHE PATH
"Path to the googletest root tree. Should contain googletest and googlemock subdirs. And CMakeLists.txt in root, and in both of these subdirs")
# Download and install GoogleTest
message(STATUS "Looking for Google Test sources")
message(STATUS "Looking for Google Test sources in ${GOOGLETEST_PATH}")
if(EXISTS "${GOOGLETEST_PATH}" AND IS_DIRECTORY "${GOOGLETEST_PATH}" AND EXISTS "${GOOGLETEST_PATH}/CMakeLists.txt" AND
EXISTS "${GOOGLETEST_PATH}/googletest" AND IS_DIRECTORY "${GOOGLETEST_PATH}/googletest" AND EXISTS "${GOOGLETEST_PATH}/googletest/CMakeLists.txt" AND
EXISTS "${GOOGLETEST_PATH}/googlemock" AND IS_DIRECTORY "${GOOGLETEST_PATH}/googlemock" AND EXISTS "${GOOGLETEST_PATH}/googlemock/CMakeLists.txt")
message(STATUS "Found Google Test in ${GOOGLETEST_PATH}")
ExternalProject_Add(
googletest
PREFIX "${CMAKE_BINARY_DIR}"
DOWNLOAD_DIR "${CMAKE_BINARY_DIR}/download"
SOURCE_DIR "${GOOGLETEST_PATH}" # use existing src dir.
BINARY_DIR "${CMAKE_BINARY_DIR}/build"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
else()
if(NOT ALLOW_DOWNLOADING_GOOGLETEST)
message(SEND_ERROR "Did not find Google Test sources! Either pass correct path in GOOGLETEST_PATH, or enable BENCHMARK_DOWNLOAD_DEPENDENCIES, or disable BENCHMARK_USE_BUNDLED_GTEST, or disable BENCHMARK_ENABLE_GTEST_TESTS / BENCHMARK_ENABLE_TESTING.")
return()
else()
message(STATUS "Did not find Google Test sources! Fetching from web...")
ExternalProject_Add(
googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG "v1.15.2"
GIT_SHALLOW "ON"
PREFIX "${CMAKE_BINARY_DIR}"
STAMP_DIR "${CMAKE_BINARY_DIR}/stamp"
DOWNLOAD_DIR "${CMAKE_BINARY_DIR}/download"
SOURCE_DIR "${CMAKE_BINARY_DIR}/src"
BINARY_DIR "${CMAKE_BINARY_DIR}/build"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
endif()
endif()
ExternalProject_Get_Property(googletest SOURCE_DIR BINARY_DIR)
file(WRITE googletest-paths.cmake
"set(GOOGLETEST_SOURCE_DIR \"${SOURCE_DIR}\")
set(GOOGLETEST_BINARY_DIR \"${BINARY_DIR}\")
")

View File

@@ -1,113 +0,0 @@
include(split_list)
macro(build_external_gtest)
include(ExternalProject)
set(GTEST_FLAGS "")
if (BENCHMARK_USE_LIBCXX)
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
list(APPEND GTEST_FLAGS -stdlib=libc++)
else()
message(WARNING "Unsupported compiler (${CMAKE_CXX_COMPILER}) when using libc++")
endif()
endif()
if (BENCHMARK_BUILD_32_BITS)
list(APPEND GTEST_FLAGS -m32)
endif()
if (NOT "${CMAKE_CXX_FLAGS}" STREQUAL "")
list(APPEND GTEST_FLAGS ${CMAKE_CXX_FLAGS})
endif()
string(TOUPPER "${CMAKE_BUILD_TYPE}" GTEST_BUILD_TYPE)
if ("${GTEST_BUILD_TYPE}" STREQUAL "COVERAGE")
set(GTEST_BUILD_TYPE "DEBUG")
endif()
# FIXME: Since 10/Feb/2017 the googletest trunk has had a bug where
# -Werror=unused-function fires during the build on OS X. This is a temporary
# workaround to keep our travis bots from failing. It should be removed
# once gtest is fixed.
if (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
list(APPEND GTEST_FLAGS "-Wno-unused-function")
endif()
split_list(GTEST_FLAGS)
set(EXCLUDE_FROM_ALL_OPT "")
set(EXCLUDE_FROM_ALL_VALUE "")
if (${CMAKE_VERSION} VERSION_GREATER "3.0.99")
set(EXCLUDE_FROM_ALL_OPT "EXCLUDE_FROM_ALL")
set(EXCLUDE_FROM_ALL_VALUE "ON")
endif()
ExternalProject_Add(googletest
${EXCLUDE_FROM_ALL_OPT} ${EXCLUDE_FROM_ALL_VALUE}
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG master
PREFIX "${CMAKE_BINARY_DIR}/googletest"
INSTALL_DIR "${CMAKE_BINARY_DIR}/googletest"
CMAKE_CACHE_ARGS
-DCMAKE_BUILD_TYPE:STRING=${GTEST_BUILD_TYPE}
-DCMAKE_C_COMPILER:STRING=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER:STRING=${CMAKE_CXX_COMPILER}
-DCMAKE_INSTALL_PREFIX:PATH=<INSTALL_DIR>
-DCMAKE_INSTALL_LIBDIR:PATH=<INSTALL_DIR>/lib
-DCMAKE_CXX_FLAGS:STRING=${GTEST_FLAGS}
-Dgtest_force_shared_crt:BOOL=ON
)
ExternalProject_Get_Property(googletest install_dir)
set(GTEST_INCLUDE_DIRS ${install_dir}/include)
file(MAKE_DIRECTORY ${GTEST_INCLUDE_DIRS})
set(LIB_SUFFIX "${CMAKE_STATIC_LIBRARY_SUFFIX}")
set(LIB_PREFIX "${CMAKE_STATIC_LIBRARY_PREFIX}")
if("${GTEST_BUILD_TYPE}" STREQUAL "DEBUG")
set(LIB_SUFFIX "d${CMAKE_STATIC_LIBRARY_SUFFIX}")
endif()
# Use gmock_main instead of gtest_main because it initializes gtest as well.
# Note: The libraries are listed in reverse order of their dependancies.
foreach(LIB gtest gmock gmock_main)
add_library(${LIB} UNKNOWN IMPORTED)
set_target_properties(${LIB} PROPERTIES
IMPORTED_LOCATION ${install_dir}/lib/${LIB_PREFIX}${LIB}${LIB_SUFFIX}
INTERFACE_INCLUDE_DIRECTORIES ${GTEST_INCLUDE_DIRS}
INTERFACE_LINK_LIBRARIES "${GTEST_BOTH_LIBRARIES}"
)
add_dependencies(${LIB} googletest)
list(APPEND GTEST_BOTH_LIBRARIES ${LIB})
endforeach()
endmacro(build_external_gtest)
if (BENCHMARK_ENABLE_GTEST_TESTS)
if (IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/googletest)
set(GTEST_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/googletest")
set(INSTALL_GTEST OFF CACHE INTERNAL "")
set(INSTALL_GMOCK OFF CACHE INTERNAL "")
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/googletest)
set(GTEST_BOTH_LIBRARIES gtest gmock gmock_main)
foreach(HEADER test mock)
# CMake 2.8 and older don't respect INTERFACE_INCLUDE_DIRECTORIES, so we
# have to add the paths ourselves.
set(HFILE g${HEADER}/g${HEADER}.h)
set(HPATH ${GTEST_ROOT}/google${HEADER}/include)
find_path(HEADER_PATH_${HEADER} ${HFILE}
NO_DEFAULT_PATHS
HINTS ${HPATH}
)
if (NOT HEADER_PATH_${HEADER})
message(FATAL_ERROR "Failed to find header ${HFILE} in ${HPATH}")
endif()
list(APPEND GTEST_INCLUDE_DIRS ${HEADER_PATH_${HEADER}})
endforeach()
elseif(BENCHMARK_DOWNLOAD_DEPENDENCIES)
build_external_gtest()
else()
find_package(GTest REQUIRED)
find_path(GMOCK_INCLUDE_DIRS gmock/gmock.h
HINTS ${GTEST_INCLUDE_DIRS})
if (NOT GMOCK_INCLUDE_DIRS)
message(FATAL_ERROR "Failed to find header gmock/gmock.h with hint ${GTEST_INCLUDE_DIRS}")
endif()
set(GTEST_INCLUDE_DIRS ${GTEST_INCLUDE_DIRS} ${GMOCK_INCLUDE_DIRS})
# FIXME: We don't currently require the gmock library to build the tests,
# and it's likely we won't find it, so we don't try. As long as we've
# found the gmock/gmock.h header and gtest_main that should be good enough.
endif()
endif()

View File

@@ -1,11 +1,12 @@
prefix=@CMAKE_INSTALL_PREFIX@
exec_prefix=${prefix}
libdir=${prefix}/lib
includedir=${prefix}/include
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
Name: @PROJECT_NAME@
Description: Google microbenchmark framework
Version: @VERSION@
Version: @NORMALIZED_VERSION@
Libs: -L${libdir} -lbenchmark
Libs.private: -lpthread @BENCHMARK_PRIVATE_LINK_LIBRARIES@
Cflags: -I${includedir}

View File

@@ -0,0 +1,7 @@
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
Name: @PROJECT_NAME@
Description: Google microbenchmark framework (with main() function)
Version: @NORMALIZED_VERSION@
Requires: benchmark
Libs: -L${libdir} -lbenchmark_main

View File

@@ -0,0 +1,16 @@
#include <pthread.h>
int main() {
cpu_set_t set;
CPU_ZERO(&set);
for (int i = 0; i < CPU_SETSIZE; ++i) {
CPU_SET(i, &set);
CPU_CLR(i, &set);
}
pthread_t self = pthread_self();
int ret;
ret = pthread_getaffinity_np(self, sizeof(set), &set);
if (ret != 0) return ret;
ret = pthread_setaffinity_np(self, sizeof(set), &set);
if (ret != 0) return ret;
return 0;
}

View File

@@ -111,6 +111,7 @@ between compilers or compiler versions. A common example of this
is matching stack frame addresses. In this case regular expressions
can be used to match the differing bits of output. For example:
<!-- {% raw %} -->
```c++
int ExternInt;
struct Point { int x, y, z; };
@@ -127,6 +128,7 @@ extern "C" void test_store_point() {
// CHECK: ret
}
```
<!-- {% endraw %} -->
## Current Requirements and Limitations

View File

@@ -0,0 +1,3 @@
theme: jekyll-theme-minimal
logo: /assets/images/icon_black.png
show_downloads: true

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

View File

@@ -0,0 +1,19 @@
# Build tool dependency policy
We follow the [Foundational C++ support policy](https://opensource.google/documentation/policies/cplusplus-support) for our build tools. In
particular the ["Build Systems" section](https://opensource.google/documentation/policies/cplusplus-support#build-systems).
## CMake
The current supported version is CMake 3.10 as of 2023-08-10. Most modern
distributions include newer versions, for example:
* Ubuntu 20.04 provides CMake 3.16.3
* Debian 11.4 provides CMake 3.18.4
* Ubuntu 22.04 provides CMake 3.22.1
## Python
The Python bindings require Python 3.10+ as of v1.9.0 (2024-08-16) for installation from PyPI.
Building from source for older versions probably still works, though. See the [user guide](python_bindings.md) for details on how to build from source.
The minimum theoretically supported version is Python 3.8, since the used bindings generator (nanobind) only supports Python 3.8+.

12
third_party/benchmark/docs/index.md vendored Normal file
View File

@@ -0,0 +1,12 @@
# Benchmark
* [Assembly Tests](AssemblyTests.md)
* [Dependencies](dependencies.md)
* [Perf Counters](perf_counters.md)
* [Platform Specific Build Instructions](platform_specific_build_instructions.md)
* [Python Bindings](python_bindings.md)
* [Random Interleaving](random_interleaving.md)
* [Reducing Variance](reducing_variance.md)
* [Releasing](releasing.md)
* [Tools](tools.md)
* [User Guide](user_guide.md)

View File

@@ -0,0 +1,35 @@
<a name="perf-counters" />
# User-Requested Performance Counters
When running benchmarks, the user may choose to request collection of
performance counters. This may be useful in investigation scenarios - narrowing
down the cause of a regression; or verifying that the underlying cause of a
performance improvement matches expectations.
This feature is available if:
* The benchmark is run on an architecture featuring a Performance Monitoring
Unit (PMU),
* The benchmark is compiled with support for collecting counters. Currently,
this requires [libpfm](http://perfmon2.sourceforge.net/), which is built as a
dependency via Bazel.
The feature does not require modifying benchmark code. Counter collection is
handled at the boundaries where timer collection is also handled.
To opt-in:
* If using a Bazel build, add `--define pfm=1` to your build flags
* If using CMake:
* Install `libpfm4-dev`, e.g. `apt-get install libpfm4-dev`.
* Enable the CMake flag `BENCHMARK_ENABLE_LIBPFM` in `CMakeLists.txt`.
To use, pass a comma-separated list of counter names through the
`--benchmark_perf_counters` flag. The names are decoded through libpfm - meaning,
they are platform specific, but some (e.g. `CYCLES` or `INSTRUCTIONS`) are
mapped by libpfm to platform-specifics - see libpfm
[documentation](http://perfmon2.sourceforge.net/docs.html) for more details.
The counter values are reported back through the [User Counters](../README.md#custom-counters)
mechanism, meaning, they are available in all the formats (e.g. JSON) supported
by User Counters.

View File

@@ -0,0 +1,52 @@
# Platform Specific Build Instructions
## Building with GCC
When the library is built using GCC it is necessary to link with the pthread
library due to how GCC implements `std::thread`. Failing to link to pthread will
lead to runtime exceptions (unless you're using libc++), not linker errors. See
[issue #67](https://github.com/google/benchmark/issues/67) for more details. You
can link to pthread by adding `-pthread` to your linker command. Note, you can
also use `-lpthread`, but there are potential issues with ordering of command
line parameters if you use that.
On QNX, the pthread library is part of libc and usually included automatically
(see
[`pthread_create()`](https://www.qnx.com/developers/docs/7.1/index.html#com.qnx.doc.neutrino.lib_ref/topic/p/pthread_create.html)).
There's no separate pthread library to link.
## Building with Visual Studio 2015, 2017 or 2022
The `shlwapi` library (`-lshlwapi`) is required to support a call to `CPUInfo` which reads the registry. Either add `shlwapi.lib` under `[ Configuration Properties > Linker > Input ]`, or use the following:
```
// Alternatively, can add libraries using linker options.
// First, Add the path to the generated library files (directory containing the `benchmark.lib`) in `[Configuration Properties > Linker > General > Additional Library Directories]`. Then do the following:
#ifdef _WIN32
#pragma comment ( lib, "Shlwapi.lib" )
#ifdef _DEBUG
#pragma comment ( lib, "benchmark.lib" )
#else
#pragma comment ( lib, "benchmark.lib" )
#endif
#endif
```
When using the static library, make sure to add `BENCHMARK_STATIC_DEFINE` under `[Configuration Properties > C/C++ > Preprocessor > Preprocessor Definitions]`
Can also use the graphical version of CMake:
* Open `CMake GUI`.
* Under `Where to build the binaries`, same path as source plus `build`.
* Under `CMAKE_INSTALL_PREFIX`, same path as source plus `install`.
* Click `Configure`, `Generate`, `Open Project`.
* If build fails, try deleting entire directory and starting again, or unticking options to build less.
## Building with Intel 2015 Update 1 or Intel System Studio Update 4
See instructions for building with Visual Studio. Once built, right click on the solution and change the build to Intel.
## Building on Solaris
If you're running benchmarks on solaris, you'll want the kstat library linked in
too (`-lkstat`).

View File

@@ -0,0 +1,34 @@
# Building and installing Python bindings
Python bindings are available as wheels on [PyPI](https://pypi.org/project/google-benchmark/) for importing and
using Google Benchmark directly in Python.
Currently, pre-built wheels exist for macOS (both ARM64 and Intel x86), Linux x86-64 and 64-bit Windows.
Supported Python versions are Python 3.8 - 3.12.
To install Google Benchmark's Python bindings, run:
```bash
python -m pip install --upgrade pip # for manylinux2014 support
python -m pip install google-benchmark
```
In order to keep your system Python interpreter clean, it is advisable to run these commands in a virtual
environment. See the [official Python documentation](https://docs.python.org/3/library/venv.html)
on how to create virtual environments.
To build a wheel directly from source, you can follow these steps:
```bash
git clone https://github.com/google/benchmark.git
cd benchmark
# create a virtual environment and activate it
python3 -m venv venv --system-site-packages
source venv/bin/activate # .\venv\Scripts\Activate.ps1 on Windows
# upgrade Python's system-wide packages
python -m pip install --upgrade pip build
# builds the wheel and stores it in the directory "dist".
python -m build
```
NB: Building wheels from source requires Bazel. For platform-specific instructions on how to install Bazel,
refer to the [Bazel installation docs](https://bazel.build/install).

View File

@@ -0,0 +1,13 @@
<a name="interleaving" />
# Random Interleaving
[Random Interleaving](https://github.com/google/benchmark/issues/1051) is a
technique to lower run-to-run variance. It randomly interleaves repetitions of a
microbenchmark with repetitions from other microbenchmarks in the same benchmark
test. Data shows it is able to lower run-to-run variance by
[40%](https://github.com/google/benchmark/issues/1051) on average.
To use, you mainly need to set `--benchmark_enable_random_interleaving=true`,
and optionally specify non-zero repetition count `--benchmark_repetitions=9`
and optionally decrease the per-repetition time `--benchmark_min_time=0.1`.

View File

@@ -0,0 +1,133 @@
# Reducing Variance
<a name="disabling-cpu-frequency-scaling" />
## Disabling CPU Frequency Scaling
If you see this error:
```
***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
```
you might want to disable the CPU frequency scaling while running the
benchmark, as well as consider other ways to stabilize the performance of
your system while benchmarking.
Exactly how to do this depends on the Linux distribution,
desktop environment, and installed programs. Specific details are a moving
target, so we will not attempt to exhaustively document them here.
One simple option is to use the `cpupower` program to change the
performance governor to "performance". This tool is maintained along with
the Linux kernel and provided by your distribution.
It must be run as root, like this:
```bash
sudo cpupower frequency-set --governor performance
```
After this you can verify that all CPUs are using the performance governor
by running this command:
```bash
cpupower frequency-info -o proc
```
The benchmarks you subsequently run will have less variance.
<a name="reducing-variance" />
## Disabling ASLR
If you see this error:
```
***WARNING*** ASLR is enabled, the results may have unreproducible noise in them.
```
you might want to disable the ASLR security hardening feature while running the
benchmark.
The simplest way is to add
```
benchmark::MaybeReenterWithoutASLR(argc, argv);
```
as the first line of your `main()` function. It will try to disable ASLR
for the current processor, and, if successful, re-execute the binary.
Note that `personality(2)` may be forbidden by e.g. seccomp (which happens
by default if you are running in a Docker container).
Note that if you link to `benchmark_main` already does that for you.
To globally disable ASLR on Linux, run
```
echo 0 > /proc/sys/kernel/randomize_va_space
```
To run a single benchmark with ASLR disabled on Linux, do:
```
setarch `uname -m` -R ./a_benchmark
```
Note that for the information on how to disable ASLR on other operating systems,
please refer to their documentation.
## Reducing Variance in Benchmarks
The Linux CPU frequency governor [discussed
above](user_guide#disabling-cpu-frequency-scaling) is not the only source
of noise in benchmarks. Some, but not all, of the sources of variance
include:
1. On multi-core machines not all CPUs/CPU cores/CPU threads run the same
speed, so running a benchmark one time and then again may give a
different result depending on which CPU it ran on.
2. CPU scaling features that run on the CPU, like Intel's Turbo Boost and
AMD Turbo Core and Precision Boost, can temporarily change the CPU
frequency even when the using the "performance" governor on Linux.
3. Context switching between CPUs, or scheduling competition on the CPU the
benchmark is running on.
4. Intel Hyperthreading or AMD SMT causing the same issue as above.
5. Cache effects caused by code running on other CPUs.
6. Non-uniform memory architectures (NUMA).
These can cause variance in benchmarks results within a single run
(`--benchmark_repetitions=N`) or across multiple runs of the benchmark
program.
Reducing sources of variance is OS and architecture dependent, which is one
reason some companies maintain machines dedicated to performance testing.
Some of the easier and effective ways of reducing variance on a typical
Linux workstation are:
1. Use the performance governor as [discussed
above](user_guide#disabling-cpu-frequency-scaling).
1. Disable processor boosting by:
```sh
echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost
```
See the Linux kernel's
[boost.txt](https://www.kernel.org/doc/Documentation/cpu-freq/boost.txt)
for more information.
2. Set the benchmark program's task affinity to a fixed cpu. For example:
```sh
taskset -c 0 ./mybenchmark
```
3. Disabling Hyperthreading/SMT. This can be done in the Bios or using the
`/sys` file system (see the LLVM project's [Benchmarking
tips](https://llvm.org/docs/Benchmarking.html)).
4. Close other programs that do non-trivial things based on timers, such as
your web browser, desktop environment, etc.
5. Reduce the working set of your benchmark to fit within the L1 cache, but
do be aware that this may lead you to optimize for an unrealistic
situation.
Further resources on this topic:
1. The LLVM project's [Benchmarking
tips](https://llvm.org/docs/Benchmarking.html).
1. The Arch Wiki [Cpu frequency
scaling](https://wiki.archlinux.org/title/CPU_frequency_scaling) page.

38
third_party/benchmark/docs/releasing.md vendored Normal file
View File

@@ -0,0 +1,38 @@
# How to release
* Make sure you're on main and synced to HEAD
* Ensure the project builds and tests run
* `parallel -j0 exec ::: test/*_test` can help ensure everything at least
passes
* Prepare release notes
* `git log $(git describe --abbrev=0 --tags)..HEAD` gives you the list of
commits between the last annotated tag and HEAD
* Pick the most interesting.
* Create one last commit that updates the version saved in `CMakeLists.txt`, `MODULE.bazel`,
and `bindings/python/google_benchmark/__init__.py` to the release version you're creating.
(This version will be used if benchmark is installed from the archive you'll be creating
in the next step.)
```
# CMakeLists.txt
project (benchmark VERSION 1.9.0 LANGUAGES CXX)
```
```
# MODULE.bazel
module(name = "com_github_google_benchmark", version="1.9.0")
```
```
# google_benchmark/__init__.py
__version__ = "1.9.0"
```
* Create a release through github's interface
* Note this will create a lightweight tag.
* Update this to an annotated tag:
* `git pull --tags`
* `git tag -a -f <tag> <tag>`
* `git push --force --tags origin`
* Confirm that the "Build and upload Python wheels" action runs to completion
* Run it manually if it hasn't run.

View File

@@ -4,7 +4,11 @@
The `compare.py` can be used to compare the result of benchmarks.
**NOTE**: the utility relies on the scipy package which can be installed using [these instructions](https://www.scipy.org/install.html).
### Dependencies
The utility relies on the [scipy](https://www.scipy.org) package which can be installed using pip:
```bash
pip3 install -r requirements.txt
```
### Displaying aggregates only
@@ -182,6 +186,146 @@ Benchmark Time CPU Time Old
This is a mix of the previous two modes, two (potentially different) benchmark binaries are run, and a different filter is applied to each one.
As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`.
### Note: Interpreting the output
Performance measurements are an art, and performance comparisons are doubly so.
Results are often noisy and don't necessarily have large absolute differences to
them, so just by visual inspection, it is not at all apparent if two
measurements are actually showing a performance change or not. It is even more
confusing with multiple benchmark repetitions.
Thankfully, what we can do, is use statistical tests on the results to determine
whether the performance has statistically-significantly changed. `compare.py`
uses [MannWhitney U
test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test), with a null
hypothesis being that there's no difference in performance.
**The below output is a summary of a benchmark comparison with statistics
provided for a multi-threaded process.**
```
Benchmark Time CPU Time Old Time New CPU Old CPU New
-----------------------------------------------------------------------------------------------------------------------------
benchmark/threads:1/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 27 vs 27
benchmark/threads:1/process_time/real_time_mean -0.1442 -0.1442 90 77 90 77
benchmark/threads:1/process_time/real_time_median -0.1444 -0.1444 90 77 90 77
benchmark/threads:1/process_time/real_time_stddev +0.3974 +0.3933 0 0 0 0
benchmark/threads:1/process_time/real_time_cv +0.6329 +0.6280 0 0 0 0
OVERALL_GEOMEAN -0.1442 -0.1442 0 0 0 0
```
--------------------------------------------
Here's a breakdown of each row:
**benchmark/threads:1/process_time/real_time_pvalue**: This shows the _p-value_ for
the statistical test comparing the performance of the process running with one
thread. A value of 0.0000 suggests a statistically significant difference in
performance. The comparison was conducted using the U Test (Mann-Whitney
U Test) with 27 repetitions for each case.
**benchmark/threads:1/process_time/real_time_mean**: This shows the relative
difference in mean execution time between two different cases. The negative
value (-0.1442) implies that the new process is faster by about 14.42%. The old
time was 90 units, while the new time is 77 units.
**benchmark/threads:1/process_time/real_time_median**: Similarly, this shows the
relative difference in the median execution time. Again, the new process is
faster by 14.44%.
**benchmark/threads:1/process_time/real_time_stddev**: This is the relative
difference in the standard deviation of the execution time, which is a measure
of how much variation or dispersion there is from the mean. A positive value
(+0.3974) implies there is more variance in the execution time in the new
process.
**benchmark/threads:1/process_time/real_time_cv**: CV stands for Coefficient of
Variation. It is the ratio of the standard deviation to the mean. It provides a
standardized measure of dispersion. An increase (+0.6329) indicates more
relative variability in the new process.
**OVERALL_GEOMEAN**: Geomean stands for geometric mean, a type of average that is
less influenced by outliers. The negative value indicates a general improvement
in the new process. However, given the values are all zero for the old and new
times, this seems to be a mistake or placeholder in the output.
-----------------------------------------
Let's first try to see what the different columns represent in the above
`compare.py` benchmarking output:
1. **Benchmark:** The name of the function being benchmarked, along with the
size of the input (after the slash).
2. **Time:** The average time per operation, across all iterations.
3. **CPU:** The average CPU time per operation, across all iterations.
4. **Iterations:** The number of iterations the benchmark was run to get a
stable estimate.
5. **Time Old and Time New:** These represent the average time it takes for a
function to run in two different scenarios or versions. For example, you
might be comparing how fast a function runs before and after you make some
changes to it.
6. **CPU Old and CPU New:** These show the average amount of CPU time that the
function uses in two different scenarios or versions. This is similar to
Time Old and Time New, but focuses on CPU usage instead of overall time.
In the comparison section, the relative differences in both time and CPU time
are displayed for each input size.
A statistically-significant difference is determined by a **p-value**, which is
a measure of the probability that the observed difference could have occurred
just by random chance. A smaller p-value indicates stronger evidence against the
null hypothesis.
**Therefore:**
1. If the p-value is less than the chosen significance level (alpha), we
reject the null hypothesis and conclude the benchmarks are significantly
different.
2. If the p-value is greater than or equal to alpha, we fail to reject the
null hypothesis and treat the two benchmarks as similar.
The result of said the statistical test is additionally communicated through color coding:
```diff
+ Green:
```
The benchmarks are _**statistically different**_. This could mean the
performance has either **significantly improved** or **significantly
deteriorated**. You should look at the actual performance numbers to see which
is the case.
```diff
- Red:
```
The benchmarks are _**statistically similar**_. This means the performance
**hasn't significantly changed**.
In statistical terms, **'green'** means we reject the null hypothesis that
there's no difference in performance, and **'red'** means we fail to reject the
null hypothesis. This might seem counter-intuitive if you're expecting 'green'
to mean 'improved performance' and 'red' to mean 'worsened performance'.
```bash
But remember, in this context:
'Success' means 'successfully finding a difference'.
'Failure' means 'failing to find a difference'.
```
Also, please note that **even if** we determine that there **is** a
statistically-significant difference between the two measurements, it does not
_necessarily_ mean that the actual benchmarks that were measured **are**
different, or vice versa, even if we determine that there is **no**
statistically-significant difference between the two measurements, it does not
necessarily mean that the actual benchmarks that were measured **are not**
different.
### U test
If there is a sufficient repetition count of the benchmarks, the tool can do

1405
third_party/benchmark/docs/user_guide.md vendored Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,47 @@
#ifndef BENCHMARK_EXPORT_H
#define BENCHMARK_EXPORT_H
#if defined(_WIN32)
#define EXPORT_ATTR __declspec(dllexport)
#define IMPORT_ATTR __declspec(dllimport)
#define NO_EXPORT_ATTR
#define DEPRECATED_ATTR __declspec(deprecated)
#else // _WIN32
#define EXPORT_ATTR __attribute__((visibility("default")))
#define IMPORT_ATTR __attribute__((visibility("default")))
#define NO_EXPORT_ATTR __attribute__((visibility("hidden")))
#define DEPRECATE_ATTR __attribute__((__deprecated__))
#endif // _WIN32
#ifdef BENCHMARK_STATIC_DEFINE
#define BENCHMARK_EXPORT
#define BENCHMARK_NO_EXPORT
#else // BENCHMARK_STATIC_DEFINE
#ifndef BENCHMARK_EXPORT
#ifdef benchmark_EXPORTS
/* We are building this library */
#define BENCHMARK_EXPORT EXPORT_ATTR
#else // benchmark_EXPORTS
/* We are using this library */
#define BENCHMARK_EXPORT IMPORT_ATTR
#endif // benchmark_EXPORTS
#endif // !BENCHMARK_EXPORT
#ifndef BENCHMARK_NO_EXPORT
#define BENCHMARK_NO_EXPORT NO_EXPORT_ATTR
#endif // !BENCHMARK_NO_EXPORT
#endif // BENCHMARK_STATIC_DEFINE
#ifndef BENCHMARK_DEPRECATED
#define BENCHMARK_DEPRECATED DEPRECATE_ATTR
#endif // BENCHMARK_DEPRECATED
#ifndef BENCHMARK_DEPRECATED_EXPORT
#define BENCHMARK_DEPRECATED_EXPORT BENCHMARK_EXPORT BENCHMARK_DEPRECATED
#endif // BENCHMARK_DEPRECATED_EXPORT
#ifndef BENCHMARK_DEPRECATED_NO_EXPORT
#define BENCHMARK_DEPRECATED_NO_EXPORT BENCHMARK_NO_EXPORT BENCHMARK_DEPRECATED
#endif // BENCHMARK_DEPRECATED_EXPORT
#endif /* BENCHMARK_EXPORT_H */

View File

@@ -1,320 +0,0 @@
#! /usr/bin/env python
# encoding: utf-8
import argparse
import errno
import logging
import os
import platform
import re
import sys
import subprocess
import tempfile
try:
import winreg
except ImportError:
import _winreg as winreg
try:
import urllib.request as request
except ImportError:
import urllib as request
try:
import urllib.parse as parse
except ImportError:
import urlparse as parse
class EmptyLogger(object):
'''
Provides an implementation that performs no logging
'''
def debug(self, *k, **kw):
pass
def info(self, *k, **kw):
pass
def warn(self, *k, **kw):
pass
def error(self, *k, **kw):
pass
def critical(self, *k, **kw):
pass
def setLevel(self, *k, **kw):
pass
urls = (
'http://downloads.sourceforge.net/project/mingw-w64/Toolchains%20'
'targetting%20Win32/Personal%20Builds/mingw-builds/installer/'
'repository.txt',
'http://downloads.sourceforge.net/project/mingwbuilds/host-windows/'
'repository.txt'
)
'''
A list of mingw-build repositories
'''
def repository(urls = urls, log = EmptyLogger()):
'''
Downloads and parse mingw-build repository files and parses them
'''
log.info('getting mingw-builds repository')
versions = {}
re_sourceforge = re.compile(r'http://sourceforge.net/projects/([^/]+)/files')
re_sub = r'http://downloads.sourceforge.net/project/\1'
for url in urls:
log.debug(' - requesting: %s', url)
socket = request.urlopen(url)
repo = socket.read()
if not isinstance(repo, str):
repo = repo.decode();
socket.close()
for entry in repo.split('\n')[:-1]:
value = entry.split('|')
version = tuple([int(n) for n in value[0].strip().split('.')])
version = versions.setdefault(version, {})
arch = value[1].strip()
if arch == 'x32':
arch = 'i686'
elif arch == 'x64':
arch = 'x86_64'
arch = version.setdefault(arch, {})
threading = arch.setdefault(value[2].strip(), {})
exceptions = threading.setdefault(value[3].strip(), {})
revision = exceptions.setdefault(int(value[4].strip()[3:]),
re_sourceforge.sub(re_sub, value[5].strip()))
return versions
def find_in_path(file, path=None):
'''
Attempts to find an executable in the path
'''
if platform.system() == 'Windows':
file += '.exe'
if path is None:
path = os.environ.get('PATH', '')
if type(path) is type(''):
path = path.split(os.pathsep)
return list(filter(os.path.exists,
map(lambda dir, file=file: os.path.join(dir, file), path)))
def find_7zip(log = EmptyLogger()):
'''
Attempts to find 7zip for unpacking the mingw-build archives
'''
log.info('finding 7zip')
path = find_in_path('7z')
if not path:
key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\7-Zip')
path, _ = winreg.QueryValueEx(key, 'Path')
path = [os.path.join(path, '7z.exe')]
log.debug('found \'%s\'', path[0])
return path[0]
find_7zip()
def unpack(archive, location, log = EmptyLogger()):
'''
Unpacks a mingw-builds archive
'''
sevenzip = find_7zip(log)
log.info('unpacking %s', os.path.basename(archive))
cmd = [sevenzip, 'x', archive, '-o' + location, '-y']
log.debug(' - %r', cmd)
with open(os.devnull, 'w') as devnull:
subprocess.check_call(cmd, stdout = devnull)
def download(url, location, log = EmptyLogger()):
'''
Downloads and unpacks a mingw-builds archive
'''
log.info('downloading MinGW')
log.debug(' - url: %s', url)
log.debug(' - location: %s', location)
re_content = re.compile(r'attachment;[ \t]*filename=(")?([^"]*)(")?[\r\n]*')
stream = request.urlopen(url)
try:
content = stream.getheader('Content-Disposition') or ''
except AttributeError:
content = stream.headers.getheader('Content-Disposition') or ''
matches = re_content.match(content)
if matches:
filename = matches.group(2)
else:
parsed = parse.urlparse(stream.geturl())
filename = os.path.basename(parsed.path)
try:
os.makedirs(location)
except OSError as e:
if e.errno == errno.EEXIST and os.path.isdir(location):
pass
else:
raise
archive = os.path.join(location, filename)
with open(archive, 'wb') as out:
while True:
buf = stream.read(1024)
if not buf:
break
out.write(buf)
unpack(archive, location, log = log)
os.remove(archive)
possible = os.path.join(location, 'mingw64')
if not os.path.exists(possible):
possible = os.path.join(location, 'mingw32')
if not os.path.exists(possible):
raise ValueError('Failed to find unpacked MinGW: ' + possible)
return possible
def root(location = None, arch = None, version = None, threading = None,
exceptions = None, revision = None, log = EmptyLogger()):
'''
Returns the root folder of a specific version of the mingw-builds variant
of gcc. Will download the compiler if needed
'''
# Get the repository if we don't have all the information
if not (arch and version and threading and exceptions and revision):
versions = repository(log = log)
# Determine some defaults
version = version or max(versions.keys())
if not arch:
arch = platform.machine().lower()
if arch == 'x86':
arch = 'i686'
elif arch == 'amd64':
arch = 'x86_64'
if not threading:
keys = versions[version][arch].keys()
if 'posix' in keys:
threading = 'posix'
elif 'win32' in keys:
threading = 'win32'
else:
threading = keys[0]
if not exceptions:
keys = versions[version][arch][threading].keys()
if 'seh' in keys:
exceptions = 'seh'
elif 'sjlj' in keys:
exceptions = 'sjlj'
else:
exceptions = keys[0]
if revision == None:
revision = max(versions[version][arch][threading][exceptions].keys())
if not location:
location = os.path.join(tempfile.gettempdir(), 'mingw-builds')
# Get the download url
url = versions[version][arch][threading][exceptions][revision]
# Tell the user whatzzup
log.info('finding MinGW %s', '.'.join(str(v) for v in version))
log.debug(' - arch: %s', arch)
log.debug(' - threading: %s', threading)
log.debug(' - exceptions: %s', exceptions)
log.debug(' - revision: %s', revision)
log.debug(' - url: %s', url)
# Store each specific revision differently
slug = '{version}-{arch}-{threading}-{exceptions}-rev{revision}'
slug = slug.format(
version = '.'.join(str(v) for v in version),
arch = arch,
threading = threading,
exceptions = exceptions,
revision = revision
)
if arch == 'x86_64':
root_dir = os.path.join(location, slug, 'mingw64')
elif arch == 'i686':
root_dir = os.path.join(location, slug, 'mingw32')
else:
raise ValueError('Unknown MinGW arch: ' + arch)
# Download if needed
if not os.path.exists(root_dir):
downloaded = download(url, os.path.join(location, slug), log = log)
if downloaded != root_dir:
raise ValueError('The location of mingw did not match\n%s\n%s'
% (downloaded, root_dir))
return root_dir
def str2ver(string):
'''
Converts a version string into a tuple
'''
try:
version = tuple(int(v) for v in string.split('.'))
if len(version) is not 3:
raise ValueError()
except ValueError:
raise argparse.ArgumentTypeError(
'please provide a three digit version string')
return version
def main():
'''
Invoked when the script is run directly by the python interpreter
'''
parser = argparse.ArgumentParser(
description = 'Downloads a specific version of MinGW',
formatter_class = argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument('--location',
help = 'the location to download the compiler to',
default = os.path.join(tempfile.gettempdir(), 'mingw-builds'))
parser.add_argument('--arch', required = True, choices = ['i686', 'x86_64'],
help = 'the target MinGW architecture string')
parser.add_argument('--version', type = str2ver,
help = 'the version of GCC to download')
parser.add_argument('--threading', choices = ['posix', 'win32'],
help = 'the threading type of the compiler')
parser.add_argument('--exceptions', choices = ['sjlj', 'seh', 'dwarf'],
help = 'the method to throw exceptions')
parser.add_argument('--revision', type=int,
help = 'the revision of the MinGW release')
group = parser.add_mutually_exclusive_group()
group.add_argument('-v', '--verbose', action='store_true',
help='increase the script output verbosity')
group.add_argument('-q', '--quiet', action='store_true',
help='only print errors and warning')
args = parser.parse_args()
# Create the logger
logger = logging.getLogger('mingw')
handler = logging.StreamHandler()
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)
if args.quiet:
logger.setLevel(logging.WARN)
if args.verbose:
logger.setLevel(logging.DEBUG)
# Get MinGW
root_dir = root(location = args.location, arch = args.arch,
version = args.version, threading = args.threading,
exceptions = args.exceptions, revision = args.revision,
log = logger)
sys.stdout.write('%s\n' % os.path.join(root_dir, 'bin'))
if __name__ == '__main__':
try:
main()
except IOError as e:
sys.stderr.write('IO error: %s\n' % e)
sys.exit(1)
except OSError as e:
sys.stderr.write('OS error: %s\n' % e)
sys.exit(1)
except KeyboardInterrupt as e:
sys.stderr.write('Killed\n')
sys.exit(1)

78
third_party/benchmark/pyproject.toml vendored Normal file
View File

@@ -0,0 +1,78 @@
[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"
[project]
name = "google_benchmark"
description = "A library to benchmark code snippets."
requires-python = ">=3.10"
license = { file = "LICENSE" }
keywords = ["benchmark"]
authors = [{ name = "Google", email = "benchmark-discuss@googlegroups.com" }]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Software Development :: Testing",
"Topic :: System :: Benchmark",
]
dynamic = ["readme", "version"]
dependencies = ["absl-py>=0.7.1"]
[project.optional-dependencies]
dev = ["pre-commit>=3.3.3"]
[project.urls]
Homepage = "https://github.com/google/benchmark"
Documentation = "https://github.com/google/benchmark/tree/main/docs"
Repository = "https://github.com/google/benchmark.git"
Discord = "https://discord.gg/cz7UX7wKC2"
[tool.setuptools]
package-dir = { "" = "bindings/python" }
zip-safe = false
[tool.setuptools.packages.find]
where = ["bindings/python"]
[tool.setuptools.dynamic]
readme = { file = "README.md", content-type = "text/markdown" }
version = { attr = "google_benchmark.__version__" }
[tool.mypy]
check_untyped_defs = true
disallow_incomplete_defs = true
pretty = true
python_version = "3.11"
strict_optional = false
warn_unreachable = true
[[tool.mypy.overrides]]
module = ["yaml"]
ignore_missing_imports = true
[tool.ruff]
# explicitly tell ruff the source directory to correctly identify first-party package.
src = ["bindings/python"]
line-length = 80
target-version = "py311"
[tool.ruff.lint]
# Enable pycodestyle (`E`, `W`), Pyflakes (`F`), and isort (`I`) codes by default.
select = ["ASYNC", "B", "C4", "C90", "E", "F", "I", "PERF", "PIE", "PT018", "RUF", "SIM", "UP", "W"]
ignore = [
"PLW2901", # redefined-loop-name
"UP031", # printf-string-formatting
]
[tool.ruff.lint.isort]
combine-as-imports = true

View File

@@ -1,16 +0,0 @@
# How to release
* Make sure you're on master and synced to HEAD
* Ensure the project builds and tests run (sanity check only, obviously)
* `parallel -j0 exec ::: test/*_test` can help ensure everything at least
passes
* Prepare release notes
* `git log $(git describe --abbrev=0 --tags)..HEAD` gives you the list of
commits between the last annotated tag and HEAD
* Pick the most interesting.
* Create a release through github's interface
* Note this will create a lightweight tag.
* Update this to an annotated tag:
* `git pull --tags`
* `git tag -a -f <tag> <tag>`
* `git push --force origin`

166
third_party/benchmark/setup.py vendored Normal file
View File

@@ -0,0 +1,166 @@
import contextlib
import os
import platform
import re
import shutil
import sys
from collections.abc import Generator
from pathlib import Path
from typing import Any
import setuptools
from setuptools.command import build_ext
IS_WINDOWS = platform.system() == "Windows"
IS_MAC = platform.system() == "Darwin"
IS_LINUX = platform.system() == "Linux"
# hardcoded SABI-related options. Requires that each Python interpreter
# (hermetic or not) participating is of the same major-minor version.
py_limited_api = sys.version_info >= (3, 12)
options = {"bdist_wheel": {"py_limited_api": "cp312"}} if py_limited_api else {}
def is_cibuildwheel() -> bool:
return os.getenv("CIBUILDWHEEL") is not None
@contextlib.contextmanager
def _maybe_patch_toolchains() -> Generator[None, None, None]:
"""
Patch rules_python toolchains to ignore root user error
when run in a Docker container on Linux in cibuildwheel.
"""
def fmt_toolchain_args(matchobj):
suffix = "ignore_root_user_error = True"
callargs = matchobj.group(1)
# toolchain def is broken over multiple lines
if callargs.endswith("\n"):
callargs = callargs + " " + suffix + ",\n"
# toolchain def is on one line.
else:
callargs = callargs + ", " + suffix
return "python.toolchain(" + callargs + ")"
CIBW_LINUX = is_cibuildwheel() and IS_LINUX
module_bazel = Path("MODULE.bazel")
content: str = module_bazel.read_text()
try:
if CIBW_LINUX:
module_bazel.write_text(
re.sub(
r"python.toolchain\(([\w\"\s,.=]*)\)",
fmt_toolchain_args,
content,
)
)
yield
finally:
if CIBW_LINUX:
module_bazel.write_text(content)
class BazelExtension(setuptools.Extension):
"""A C/C++ extension that is defined as a Bazel BUILD target."""
def __init__(self, name: str, bazel_target: str, **kwargs: Any):
super().__init__(name=name, sources=[], **kwargs)
self.bazel_target = bazel_target
stripped_target = bazel_target.split("//")[-1]
self.relpath, self.target_name = stripped_target.split(":")
class BuildBazelExtension(build_ext.build_ext):
"""A command that runs Bazel to build a C/C++ extension."""
def run(self):
for ext in self.extensions:
self.bazel_build(ext)
# explicitly call `bazel shutdown` for graceful exit
self.spawn(["bazel", "shutdown"])
def copy_extensions_to_source(self):
"""
Copy generated extensions into the source tree.
This is done in the ``bazel_build`` method, so it's not necessary to
do again in the `build_ext` base class.
"""
def bazel_build(self, ext: BazelExtension) -> None: # noqa: C901
"""Runs the bazel build to create the package."""
temp_path = Path(self.build_temp)
# We round to the minor version, which makes rules_python
# look up the latest available patch version internally.
python_version = "{}.{}".format(*sys.version_info[:2])
bazel_argv = [
"bazel",
"run",
ext.bazel_target,
f"--symlink_prefix={temp_path / 'bazel-'}",
f"--compilation_mode={'dbg' if self.debug else 'opt'}",
# C++17 is required by nanobind
f"--cxxopt={'/std:c++17' if IS_WINDOWS else '-std=c++17'}",
f"--@rules_python//python/config_settings:python_version={python_version}",
]
if ext.py_limited_api:
bazel_argv += ["--@nanobind_bazel//:py-limited-api=cp312"]
if IS_WINDOWS:
# Link with python*.lib.
for library_dir in self.library_dirs:
bazel_argv.append("--linkopt=/LIBPATH:" + library_dir)
elif IS_MAC:
# C++17 needs macOS 10.14 at minimum
bazel_argv.append("--macos_minimum_os=10.14")
with _maybe_patch_toolchains():
self.spawn(bazel_argv)
if IS_WINDOWS:
suffix = ".pyd"
else:
suffix = ".abi3.so" if ext.py_limited_api else ".so"
# copy the Bazel build artifacts into setuptools' libdir,
# from where the wheel is built.
pkgname = "google_benchmark"
pythonroot = Path("bindings") / "python" / "google_benchmark"
srcdir = temp_path / "bazel-bin" / pythonroot
libdir = Path(self.build_lib) / pkgname
for root, dirs, files in os.walk(srcdir, topdown=True):
# exclude runfiles directories and children.
dirs[:] = [d for d in dirs if "runfiles" not in d]
for f in files:
fp = Path(f)
should_copy = False
# we do not want the bare .so file included
# when building for ABI3, so we require a
# full and exact match on the file extension.
if "".join(fp.suffixes) == suffix or fp.suffix == ".pyi":
should_copy = True
elif Path(root) == srcdir and f == "py.typed":
# copy py.typed, but only at the package root.
should_copy = True
if should_copy:
shutil.copyfile(root / fp, libdir / fp)
setuptools.setup(
cmdclass={"build_ext": BuildBazelExtension},
package_data={"google_benchmark": ["py.typed", "*.pyi"]},
ext_modules=[
BazelExtension(
name="google_benchmark._benchmark",
bazel_target="//bindings/python/google_benchmark:benchmark_stubgen",
py_limited_api=py_limited_api,
)
],
options=options,
)

View File

@@ -1,4 +1,5 @@
# Allow the source files to find headers in src/
#Allow the source files to find headers in src /
include(GNUInstallDirs)
include_directories(${PROJECT_SOURCE_DIR}/src)
if (DEFINED BENCHMARK_CXX_LINKER_FLAGS)
@@ -17,92 +18,166 @@ foreach(item ${BENCHMARK_MAIN})
endforeach()
add_library(benchmark ${SOURCE_FILES})
add_library(benchmark::benchmark ALIAS benchmark)
set_target_properties(benchmark PROPERTIES
OUTPUT_NAME "benchmark"
VERSION ${GENERIC_LIB_VERSION}
SOVERSION ${GENERIC_LIB_SOVERSION}
)
target_include_directories(benchmark PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
)
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
)
set_property(
SOURCE benchmark.cc
APPEND
PROPERTY COMPILE_DEFINITIONS
BENCHMARK_VERSION="${VERSION}"
)
# libpfm, if available
if (PFM_FOUND)
target_link_libraries(benchmark PRIVATE PFM::libpfm)
target_compile_definitions(benchmark PRIVATE -DHAVE_LIBPFM)
install(
FILES "${PROJECT_SOURCE_DIR}/cmake/Modules/FindPFM.cmake"
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
endif()
# pthread affinity, if available
if(HAVE_PTHREAD_AFFINITY)
target_compile_definitions(benchmark PRIVATE -DBENCHMARK_HAS_PTHREAD_AFFINITY)
endif()
# Link threads.
target_link_libraries(benchmark ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
find_library(LIBRT rt)
if(LIBRT)
target_link_libraries(benchmark ${LIBRT})
endif()
target_link_libraries(benchmark PRIVATE Threads::Threads)
target_link_libraries(benchmark PRIVATE ${BENCHMARK_CXX_LIBRARIES})
if(HAVE_LIB_RT)
target_link_libraries(benchmark PRIVATE rt)
endif(HAVE_LIB_RT)
# We need extra libraries on Windows
if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
target_link_libraries(benchmark Shlwapi)
target_link_libraries(benchmark PRIVATE shlwapi)
endif()
# We need extra libraries on Solaris
if(${CMAKE_SYSTEM_NAME} MATCHES "SunOS")
target_link_libraries(benchmark kstat)
target_link_libraries(benchmark PRIVATE kstat)
set(BENCHMARK_PRIVATE_LINK_LIBRARIES -lkstat)
endif()
if (NOT BUILD_SHARED_LIBS)
target_compile_definitions(benchmark PUBLIC -DBENCHMARK_STATIC_DEFINE)
endif()
# Benchmark main library
add_library(benchmark_main "benchmark_main.cc")
add_library(benchmark::benchmark_main ALIAS benchmark_main)
set_target_properties(benchmark_main PROPERTIES
OUTPUT_NAME "benchmark_main"
VERSION ${GENERIC_LIB_VERSION}
SOVERSION ${GENERIC_LIB_SOVERSION}
DEFINE_SYMBOL benchmark_EXPORTS
)
target_include_directories(benchmark PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
)
target_link_libraries(benchmark_main benchmark)
target_link_libraries(benchmark_main PUBLIC benchmark::benchmark)
set(include_install_dir "include")
set(lib_install_dir "lib/")
set(bin_install_dir "bin/")
set(config_install_dir "lib/cmake/${PROJECT_NAME}")
set(pkgconfig_install_dir "lib/pkgconfig")
set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated")
set(generated_dir "${PROJECT_BINARY_DIR}")
set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake")
set(project_config "${generated_dir}/${PROJECT_NAME}Config.cmake")
set(pkg_config "${generated_dir}/${PROJECT_NAME}.pc")
set(pkg_config_main "${generated_dir}/${PROJECT_NAME}_main.pc")
set(targets_to_export benchmark benchmark_main)
set(targets_export_name "${PROJECT_NAME}Targets")
set(namespace "${PROJECT_NAME}::")
include(CMakePackageConfigHelpers)
configure_package_config_file (
${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in
${project_config}
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
NO_SET_AND_CHECK_MACRO
NO_CHECK_REQUIRED_COMPONENTS_MACRO
)
write_basic_package_version_file(
"${version_config}" VERSION ${GENERIC_LIB_VERSION} COMPATIBILITY SameMajorVersion
)
configure_file("${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in" "${project_config}" @ONLY)
configure_file("${PROJECT_SOURCE_DIR}/cmake/benchmark.pc.in" "${pkg_config}" @ONLY)
configure_file("${PROJECT_SOURCE_DIR}/cmake/benchmark_main.pc.in" "${pkg_config_main}" @ONLY)
export (
TARGETS ${targets_to_export}
NAMESPACE "${namespace}"
FILE ${generated_dir}/${targets_export_name}.cmake
)
if (BENCHMARK_ENABLE_INSTALL)
# Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable)
install(
TARGETS benchmark benchmark_main
TARGETS ${targets_to_export}
EXPORT ${targets_export_name}
ARCHIVE DESTINATION ${lib_install_dir}
LIBRARY DESTINATION ${lib_install_dir}
RUNTIME DESTINATION ${bin_install_dir}
INCLUDES DESTINATION ${include_install_dir})
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
install(
DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark"
DESTINATION ${include_install_dir}
"${PROJECT_BINARY_DIR}/include/benchmark"
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
FILES_MATCHING PATTERN "*.*h")
install(
FILES "${project_config}" "${version_config}"
DESTINATION "${config_install_dir}")
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
install(
FILES "${pkg_config}"
DESTINATION "${pkgconfig_install_dir}")
FILES "${pkg_config}" "${pkg_config_main}"
DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
install(
EXPORT "${targets_export_name}"
NAMESPACE "${namespace}"
DESTINATION "${config_install_dir}")
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
endif()
if (BENCHMARK_ENABLE_DOXYGEN)
find_package(Doxygen REQUIRED)
set(DOXYGEN_QUIET YES)
set(DOXYGEN_RECURSIVE YES)
set(DOXYGEN_GENERATE_HTML YES)
set(DOXYGEN_GENERATE_MAN NO)
set(DOXYGEN_MARKDOWN_SUPPORT YES)
set(DOXYGEN_BUILTIN_STL_SUPPORT YES)
set(DOXYGEN_EXTRACT_PACKAGE YES)
set(DOXYGEN_EXTRACT_STATIC YES)
set(DOXYGEN_SHOW_INCLUDE_FILES YES)
set(DOXYGEN_BINARY_TOC YES)
set(DOXYGEN_TOC_EXPAND YES)
set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "index.md")
doxygen_add_docs(benchmark_doxygen
docs
include
src
ALL
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMENT "Building documentation with Doxygen.")
if (BENCHMARK_ENABLE_INSTALL AND BENCHMARK_INSTALL_DOCS)
install(
DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/html/"
DESTINATION ${CMAKE_INSTALL_DOCDIR})
endif()
else()
if (BENCHMARK_ENABLE_INSTALL AND BENCHMARK_INSTALL_DOCS)
install(
DIRECTORY "${PROJECT_SOURCE_DIR}/docs/"
DESTINATION ${CMAKE_INSTALL_DOCDIR})
endif()
endif()

File diff suppressed because it is too large Load Diff

View File

@@ -1,15 +1,118 @@
#include "benchmark_api_internal.h"
#include <cinttypes>
#include "string_util.h"
namespace benchmark {
namespace internal {
BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx,
int per_family_instance_idx,
const std::vector<int64_t>& args,
int thread_count)
: benchmark_(*benchmark),
family_index_(family_idx),
per_family_instance_index_(per_family_instance_idx),
aggregation_report_mode_(benchmark_.aggregation_report_mode_),
args_(args),
time_unit_(benchmark_.GetTimeUnit()),
measure_process_cpu_time_(benchmark_.measure_process_cpu_time_),
use_real_time_(benchmark_.use_real_time_),
use_manual_time_(benchmark_.use_manual_time_),
complexity_(benchmark_.complexity_),
complexity_lambda_(benchmark_.complexity_lambda_),
statistics_(benchmark_.statistics_),
repetitions_(benchmark_.repetitions_),
min_time_(benchmark_.min_time_),
min_warmup_time_(benchmark_.min_warmup_time_),
iterations_(benchmark_.iterations_),
threads_(thread_count),
setup_(benchmark_.setup_),
teardown_(benchmark_.teardown_) {
name_.function_name = benchmark_.name_;
size_t arg_i = 0;
for (const auto& arg : args) {
if (!name_.args.empty()) {
name_.args += '/';
}
if (arg_i < benchmark->arg_names_.size()) {
const auto& arg_name = benchmark_.arg_names_[arg_i];
if (!arg_name.empty()) {
name_.args += StrFormat("%s:", arg_name.c_str());
}
}
name_.args += StrFormat("%" PRId64, arg);
++arg_i;
}
if (!IsZero(benchmark->min_time_)) {
name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_);
}
if (!IsZero(benchmark->min_warmup_time_)) {
name_.min_warmup_time =
StrFormat("min_warmup_time:%0.3f", benchmark_.min_warmup_time_);
}
if (benchmark_.iterations_ != 0) {
name_.iterations = StrFormat(
"iterations:%lu", static_cast<unsigned long>(benchmark_.iterations_));
}
if (benchmark_.repetitions_ != 0) {
name_.repetitions = StrFormat("repeats:%d", benchmark_.repetitions_);
}
if (benchmark_.measure_process_cpu_time_) {
name_.time_type = "process_time";
}
if (benchmark_.use_manual_time_) {
if (!name_.time_type.empty()) {
name_.time_type += '/';
}
name_.time_type += "manual_time";
} else if (benchmark_.use_real_time_) {
if (!name_.time_type.empty()) {
name_.time_type += '/';
}
name_.time_type += "real_time";
}
if (!benchmark_.thread_counts_.empty()) {
name_.threads = StrFormat("threads:%d", threads_);
}
}
State BenchmarkInstance::Run(
size_t iters, int thread_id, internal::ThreadTimer* timer,
internal::ThreadManager* manager) const {
State st(iters, arg, thread_id, threads, timer, manager);
benchmark->Run(st);
IterationCount iters, int thread_id, internal::ThreadTimer* timer,
internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement,
ProfilerManager* profiler_manager) const {
State st(name_.function_name, iters, args_, thread_id, threads_, timer,
manager, perf_counters_measurement, profiler_manager);
benchmark_.Run(st);
return st;
}
} // internal
} // benchmark
void BenchmarkInstance::Setup() const {
if (setup_ != nullptr) {
State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
nullptr, nullptr, nullptr, nullptr);
setup_(st);
}
}
void BenchmarkInstance::Teardown() const {
if (teardown_ != nullptr) {
State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
nullptr, nullptr, nullptr, nullptr);
teardown_(st);
}
}
} // namespace internal
} // namespace benchmark

View File

@@ -1,9 +1,6 @@
#ifndef BENCHMARK_API_INTERNAL_H
#define BENCHMARK_API_INTERNAL_H
#include "benchmark/benchmark.h"
#include "commandlineflags.h"
#include <cmath>
#include <iosfwd>
#include <limits>
@@ -11,31 +8,71 @@
#include <string>
#include <vector>
#include "benchmark/benchmark.h"
#include "commandlineflags.h"
namespace benchmark {
namespace internal {
// Information kept per benchmark we may want to run
struct BenchmarkInstance {
std::string name;
Benchmark* benchmark;
AggregationReportMode aggregation_report_mode;
std::vector<int64_t> arg;
TimeUnit time_unit;
int range_multiplier;
bool use_real_time;
bool use_manual_time;
BigO complexity;
BigOFunc* complexity_lambda;
UserCounters counters;
const std::vector<Statistics>* statistics;
bool last_benchmark_instance;
int repetitions;
double min_time;
size_t iterations;
int threads; // Number of concurrent threads to us
class BenchmarkInstance {
public:
BenchmarkInstance(Benchmark* benchmark, int family_idx,
int per_family_instance_idx,
const std::vector<int64_t>& args, int thread_count);
State Run(size_t iters, int thread_id, internal::ThreadTimer* timer,
internal::ThreadManager* manager) const;
const BenchmarkName& name() const { return name_; }
int family_index() const { return family_index_; }
int per_family_instance_index() const { return per_family_instance_index_; }
AggregationReportMode aggregation_report_mode() const {
return aggregation_report_mode_;
}
TimeUnit time_unit() const { return time_unit_; }
bool measure_process_cpu_time() const { return measure_process_cpu_time_; }
bool use_real_time() const { return use_real_time_; }
bool use_manual_time() const { return use_manual_time_; }
BigO complexity() const { return complexity_; }
BigOFunc* complexity_lambda() const { return complexity_lambda_; }
const std::vector<Statistics>& statistics() const { return statistics_; }
int repetitions() const { return repetitions_; }
double min_time() const { return min_time_; }
double min_warmup_time() const { return min_warmup_time_; }
IterationCount iterations() const { return iterations_; }
int threads() const { return threads_; }
void Setup() const;
void Teardown() const;
const auto& GetUserThreadRunnerFactory() const {
return benchmark_.threadrunner_;
}
State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer,
internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement,
ProfilerManager* profiler_manager) const;
private:
BenchmarkName name_;
Benchmark& benchmark_;
const int family_index_;
const int per_family_instance_index_;
AggregationReportMode aggregation_report_mode_;
const std::vector<int64_t>& args_;
TimeUnit time_unit_;
bool measure_process_cpu_time_;
bool use_real_time_;
bool use_manual_time_;
BigO complexity_;
BigOFunc* complexity_lambda_;
UserCounters counters_;
const std::vector<Statistics>& statistics_;
int repetitions_;
double min_time_;
double min_warmup_time_;
IterationCount iterations_;
int threads_; // Number of concurrent threads to us
callback_function setup_;
callback_function teardown_;
};
bool FindBenchmarksInternal(const std::string& re,
@@ -44,6 +81,7 @@ bool FindBenchmarksInternal(const std::string& re,
bool IsZero(double n);
BENCHMARK_EXPORT
ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false);
} // end namespace internal

View File

@@ -14,4 +14,5 @@
#include "benchmark/benchmark.h"
BENCHMARK_EXPORT int main(int /*argc*/, char** /*argv*/);
BENCHMARK_MAIN();

View File

@@ -0,0 +1,59 @@
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <benchmark/benchmark.h>
namespace benchmark {
namespace {
// Compute the total size of a pack of std::strings
size_t size_impl() { return 0; }
template <typename Head, typename... Tail>
size_t size_impl(const Head& head, const Tail&... tail) {
return head.size() + size_impl(tail...);
}
// Join a pack of std::strings using a delimiter
// TODO(dominic): use absl::StrJoin
void join_impl(std::string& /*unused*/, char /*unused*/) {}
template <typename Head, typename... Tail>
void join_impl(std::string& s, const char delimiter, const Head& head,
const Tail&... tail) {
if (!s.empty() && !head.empty()) {
s += delimiter;
}
s += head;
join_impl(s, delimiter, tail...);
}
template <typename... Ts>
std::string join(char delimiter, const Ts&... ts) {
std::string s;
s.reserve(sizeof...(Ts) + size_impl(ts...));
join_impl(s, delimiter, ts...);
return s;
}
} // namespace
BENCHMARK_EXPORT
std::string BenchmarkName::str() const {
return join('/', function_name, args, min_time, min_warmup_time, iterations,
repetitions, time_type, threads);
}
} // namespace benchmark

View File

@@ -15,7 +15,7 @@
#include "benchmark_register.h"
#ifndef BENCHMARK_OS_WINDOWS
#ifndef BENCHMARK_OS_FUCHSIA
#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#include <sys/resource.h>
#endif
#include <sys/time.h>
@@ -24,6 +24,7 @@
#include <algorithm>
#include <atomic>
#include <cinttypes>
#include <condition_variable>
#include <cstdio>
#include <cstdlib>
@@ -31,6 +32,7 @@
#include <fstream>
#include <iostream>
#include <memory>
#include <numeric>
#include <sstream>
#include <thread>
@@ -51,10 +53,13 @@ namespace benchmark {
namespace {
// For non-dense Range, intermediate values are powers of kRangeMultiplier.
static const int kRangeMultiplier = 8;
constexpr int kRangeMultiplier = 8;
// The size of a benchmark family determines is the number of inputs to repeat
// the benchmark on. If this is "large" then warn the user during configuration.
static const size_t kMaxFamilySize = 100;
constexpr size_t kMaxFamilySize = 100;
constexpr char kDisabledPrefix[] = "DISABLED_";
} // end namespace
namespace internal {
@@ -77,7 +82,7 @@ class BenchmarkFamilies {
// Extract the list of benchmark instances that match the specified
// regular expression.
bool FindBenchmarks(std::string re,
bool FindBenchmarks(std::string spec,
std::vector<BenchmarkInstance>* benchmarks,
std::ostream* Err);
@@ -109,28 +114,35 @@ void BenchmarkFamilies::ClearBenchmarks() {
bool BenchmarkFamilies::FindBenchmarks(
std::string spec, std::vector<BenchmarkInstance>* benchmarks,
std::ostream* ErrStream) {
CHECK(ErrStream);
BM_CHECK(ErrStream);
auto& Err = *ErrStream;
// Make regular expression out of command-line flag
std::string error_msg;
Regex re;
bool isNegativeFilter = false;
bool is_negative_filter = false;
if (spec[0] == '-') {
spec.replace(0, 1, "");
isNegativeFilter = true;
is_negative_filter = true;
}
if (!re.Init(spec, &error_msg)) {
Err << "Could not compile benchmark re: " << error_msg << std::endl;
Err << "Could not compile benchmark re: " << error_msg << '\n';
return false;
}
// Special list of thread counts to use when none are specified
const std::vector<int> one_thread = {1};
int next_family_index = 0;
MutexLock l(mutex_);
for (std::unique_ptr<Benchmark>& family : families_) {
int family_index = next_family_index;
int per_family_instance_index = 0;
// Family was deleted or benchmark doesn't match
if (!family) continue;
if (!family) {
continue;
}
if (family->ArgsCnt() == -1) {
family->Args({});
@@ -147,67 +159,31 @@ bool BenchmarkFamilies::FindBenchmarks(
<< " will be repeated at least " << family_size << " times.\n";
}
// reserve in the special case the regex ".", since we know the final
// family size.
if (spec == ".") benchmarks->reserve(family_size);
// family size. this doesn't take into account any disabled benchmarks
// so worst case we reserve more than we need.
if (spec == ".") {
benchmarks->reserve(benchmarks->size() + family_size);
}
for (auto const& args : family->args_) {
for (int num_threads : *thread_counts) {
BenchmarkInstance instance;
instance.name = family->name_;
instance.benchmark = family.get();
instance.aggregation_report_mode = family->aggregation_report_mode_;
instance.arg = args;
instance.time_unit = family->time_unit_;
instance.range_multiplier = family->range_multiplier_;
instance.min_time = family->min_time_;
instance.iterations = family->iterations_;
instance.repetitions = family->repetitions_;
instance.use_real_time = family->use_real_time_;
instance.use_manual_time = family->use_manual_time_;
instance.complexity = family->complexity_;
instance.complexity_lambda = family->complexity_lambda_;
instance.statistics = &family->statistics_;
instance.threads = num_threads;
BenchmarkInstance instance(family.get(), family_index,
per_family_instance_index, args,
num_threads);
// Add arguments to instance name
size_t arg_i = 0;
for (auto const& arg : args) {
instance.name += "/";
if (arg_i < family->arg_names_.size()) {
const auto& arg_name = family->arg_names_[arg_i];
if (!arg_name.empty()) {
instance.name +=
StrFormat("%s:", family->arg_names_[arg_i].c_str());
}
}
instance.name += StrFormat("%d", arg);
++arg_i;
}
if (!IsZero(family->min_time_))
instance.name += StrFormat("/min_time:%0.3f", family->min_time_);
if (family->iterations_ != 0)
instance.name += StrFormat("/iterations:%d", family->iterations_);
if (family->repetitions_ != 0)
instance.name += StrFormat("/repeats:%d", family->repetitions_);
if (family->use_manual_time_) {
instance.name += "/manual_time";
} else if (family->use_real_time_) {
instance.name += "/real_time";
}
// Add the number of threads used to the name
if (!family->thread_counts_.empty()) {
instance.name += StrFormat("/threads:%d", instance.threads);
}
if ((re.Match(instance.name) && !isNegativeFilter) ||
(!re.Match(instance.name) && isNegativeFilter)) {
instance.last_benchmark_instance = (&args == &family->args_.back());
const auto full_name = instance.name().str();
if (full_name.rfind(kDisabledPrefix, 0) != 0 &&
((re.Match(full_name) && !is_negative_filter) ||
(!re.Match(full_name) && is_negative_filter))) {
benchmarks->push_back(std::move(instance));
++per_family_instance_index;
// Only bump the next family index once we've estabilished that
// at least one instance of this family will be run.
if (next_family_index == family_index) {
++next_family_index;
}
}
}
}
@@ -215,11 +191,11 @@ bool BenchmarkFamilies::FindBenchmarks(
return true;
}
Benchmark* RegisterBenchmarkInternal(Benchmark* bench) {
std::unique_ptr<Benchmark> bench_ptr(bench);
Benchmark* RegisterBenchmarkInternal(std::unique_ptr<Benchmark> bench) {
Benchmark* bench_ptr = bench.get();
BenchmarkFamilies* families = BenchmarkFamilies::GetInstance();
families->AddBenchmark(std::move(bench_ptr));
return bench;
families->AddBenchmark(std::move(bench));
return bench_ptr;
}
// FIXME: This function is a hack so that benchmark.cc can access
@@ -234,14 +210,17 @@ bool FindBenchmarksInternal(const std::string& re,
// Benchmark
//=============================================================================//
Benchmark::Benchmark(const char* name)
Benchmark::Benchmark(const std::string& name)
: name_(name),
aggregation_report_mode_(ARM_Unspecified),
time_unit_(kNanosecond),
time_unit_(GetDefaultTimeUnit()),
use_default_time_unit_(true),
range_multiplier_(kRangeMultiplier),
min_time_(0),
min_warmup_time_(0),
iterations_(0),
repetitions_(0),
measure_process_cpu_time_(false),
use_real_time_(false),
use_manual_time_(false),
complexity_(oNone),
@@ -249,23 +228,30 @@ Benchmark::Benchmark(const char* name)
ComputeStatistics("mean", StatisticsMean);
ComputeStatistics("median", StatisticsMedian);
ComputeStatistics("stddev", StatisticsStdDev);
ComputeStatistics("cv", StatisticsCV, kPercentage);
}
Benchmark::~Benchmark() {}
Benchmark* Benchmark::Name(const std::string& name) {
SetName(name);
return this;
}
Benchmark* Benchmark::Arg(int64_t x) {
CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
args_.push_back({x});
return this;
}
Benchmark* Benchmark::Unit(TimeUnit unit) {
time_unit_ = unit;
use_default_time_unit_ = false;
return this;
}
Benchmark* Benchmark::Range(int64_t start, int64_t limit) {
CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
std::vector<int64_t> arglist;
AddRange(&arglist, start, limit, range_multiplier_);
@@ -277,54 +263,61 @@ Benchmark* Benchmark::Range(int64_t start, int64_t limit) {
Benchmark* Benchmark::Ranges(
const std::vector<std::pair<int64_t, int64_t>>& ranges) {
CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size()));
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size()));
std::vector<std::vector<int64_t>> arglists(ranges.size());
std::size_t total = 1;
for (std::size_t i = 0; i < ranges.size(); i++) {
AddRange(&arglists[i], ranges[i].first, ranges[i].second,
range_multiplier_);
total *= arglists[i].size();
}
std::vector<std::size_t> ctr(arglists.size(), 0);
ArgsProduct(arglists);
return this;
}
Benchmark* Benchmark::ArgsProduct(
const std::vector<std::vector<int64_t>>& arglists) {
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(arglists.size()));
std::vector<std::size_t> indices(arglists.size());
const std::size_t total = std::accumulate(
std::begin(arglists), std::end(arglists), std::size_t{1},
[](const std::size_t res, const std::vector<int64_t>& arglist) {
return res * arglist.size();
});
std::vector<int64_t> args;
args.reserve(arglists.size());
for (std::size_t i = 0; i < total; i++) {
std::vector<int64_t> tmp;
tmp.reserve(arglists.size());
for (std::size_t j = 0; j < arglists.size(); j++) {
tmp.push_back(arglists[j].at(ctr[j]));
for (std::size_t arg = 0; arg < arglists.size(); arg++) {
args.push_back(arglists[arg][indices[arg]]);
}
args_.push_back(args);
args.clear();
args_.push_back(std::move(tmp));
for (std::size_t j = 0; j < arglists.size(); j++) {
if (ctr[j] + 1 < arglists[j].size()) {
++ctr[j];
break;
}
ctr[j] = 0;
}
std::size_t arg = 0;
do {
indices[arg] = (indices[arg] + 1) % arglists[arg].size();
} while (indices[arg++] == 0 && arg < arglists.size());
}
return this;
}
Benchmark* Benchmark::ArgName(const std::string& name) {
CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
arg_names_ = {name};
return this;
}
Benchmark* Benchmark::ArgNames(const std::vector<std::string>& names) {
CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(names.size()));
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(names.size()));
arg_names_ = names;
return this;
}
Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) {
CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
CHECK_GE(start, 0);
CHECK_LE(start, limit);
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
BM_CHECK_LE(start, limit);
for (int64_t arg = start; arg <= limit; arg += step) {
args_.push_back({arg});
}
@@ -332,7 +325,7 @@ Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) {
}
Benchmark* Benchmark::Args(const std::vector<int64_t>& args) {
CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(args.size()));
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(args.size()));
args_.push_back(args);
return this;
}
@@ -342,28 +335,60 @@ Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) {
return this;
}
Benchmark* Benchmark::Setup(callback_function&& setup) {
BM_CHECK(setup != nullptr);
setup_ = std::forward<callback_function>(setup);
return this;
}
Benchmark* Benchmark::Setup(const callback_function& setup) {
BM_CHECK(setup != nullptr);
setup_ = setup;
return this;
}
Benchmark* Benchmark::Teardown(callback_function&& teardown) {
BM_CHECK(teardown != nullptr);
teardown_ = std::forward<callback_function>(teardown);
return this;
}
Benchmark* Benchmark::Teardown(const callback_function& teardown) {
BM_CHECK(teardown != nullptr);
teardown_ = teardown;
return this;
}
Benchmark* Benchmark::RangeMultiplier(int multiplier) {
CHECK(multiplier > 1);
BM_CHECK(multiplier > 1);
range_multiplier_ = multiplier;
return this;
}
Benchmark* Benchmark::MinTime(double t) {
CHECK(t > 0.0);
CHECK(iterations_ == 0);
BM_CHECK(t > 0.0);
BM_CHECK(iterations_ == 0);
min_time_ = t;
return this;
}
Benchmark* Benchmark::Iterations(size_t n) {
CHECK(n > 0);
CHECK(IsZero(min_time_));
Benchmark* Benchmark::MinWarmUpTime(double t) {
BM_CHECK(t >= 0.0);
BM_CHECK(iterations_ == 0);
min_warmup_time_ = t;
return this;
}
Benchmark* Benchmark::Iterations(IterationCount n) {
BM_CHECK(n > 0);
BM_CHECK(IsZero(min_time_));
BM_CHECK(IsZero(min_warmup_time_));
iterations_ = n;
return this;
}
Benchmark* Benchmark::Repetitions(int n) {
CHECK(n > 0);
BM_CHECK(n > 0);
repetitions_ = n;
return this;
}
@@ -389,15 +414,21 @@ Benchmark* Benchmark::DisplayAggregatesOnly(bool value) {
return this;
}
Benchmark* Benchmark::MeasureProcessCPUTime() {
// Can be used together with UseRealTime() / UseManualTime().
measure_process_cpu_time_ = true;
return this;
}
Benchmark* Benchmark::UseRealTime() {
CHECK(!use_manual_time_)
BM_CHECK(!use_manual_time_)
<< "Cannot set UseRealTime and UseManualTime simultaneously.";
use_real_time_ = true;
return this;
}
Benchmark* Benchmark::UseManualTime() {
CHECK(!use_real_time_)
BM_CHECK(!use_real_time_)
<< "Cannot set UseRealTime and UseManualTime simultaneously.";
use_manual_time_ = true;
return this;
@@ -414,21 +445,22 @@ Benchmark* Benchmark::Complexity(BigOFunc* complexity) {
return this;
}
Benchmark* Benchmark::ComputeStatistics(std::string name,
StatisticsFunc* statistics) {
statistics_.emplace_back(name, statistics);
Benchmark* Benchmark::ComputeStatistics(const std::string& name,
StatisticsFunc* statistics,
StatisticUnit unit) {
statistics_.emplace_back(name, statistics, unit);
return this;
}
Benchmark* Benchmark::Threads(int t) {
CHECK_GT(t, 0);
BM_CHECK_GT(t, 0);
thread_counts_.push_back(t);
return this;
}
Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) {
CHECK_GT(min_threads, 0);
CHECK_GE(max_threads, min_threads);
BM_CHECK_GT(min_threads, 0);
BM_CHECK_GE(max_threads, min_threads);
AddRange(&thread_counts_, min_threads, max_threads, 2);
return this;
@@ -436,9 +468,9 @@ Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) {
Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads,
int stride) {
CHECK_GT(min_threads, 0);
CHECK_GE(max_threads, min_threads);
CHECK_GE(stride, 1);
BM_CHECK_GT(min_threads, 0);
BM_CHECK_GE(max_threads, min_threads);
BM_CHECK_GE(stride, 1);
for (auto i = min_threads; i < max_threads; i += stride) {
thread_counts_.push_back(i);
@@ -452,16 +484,36 @@ Benchmark* Benchmark::ThreadPerCpu() {
return this;
}
void Benchmark::SetName(const char* name) { name_ = name; }
Benchmark* Benchmark::ThreadRunner(threadrunner_factory&& factory) {
threadrunner_ = std::move(factory);
return this;
}
void Benchmark::SetName(const std::string& name) { name_ = name; }
const char* Benchmark::GetName() const { return name_.c_str(); }
int Benchmark::ArgsCnt() const {
if (args_.empty()) {
if (arg_names_.empty()) return -1;
if (arg_names_.empty()) {
return -1;
}
return static_cast<int>(arg_names_.size());
}
return static_cast<int>(args_.front().size());
}
const char* Benchmark::GetArgName(int arg) const {
BM_CHECK_GE(arg, 0);
size_t uarg = static_cast<size_t>(arg);
BM_CHECK_LT(uarg, arg_names_.size());
return arg_names_[uarg].c_str();
}
TimeUnit Benchmark::GetTimeUnit() const {
return use_default_time_unit_ ? GetDefaultTimeUnit() : time_unit_;
}
//=============================================================================//
// FunctionBenchmark
//=============================================================================//
@@ -474,4 +526,19 @@ void ClearRegisteredBenchmarks() {
internal::BenchmarkFamilies::GetInstance()->ClearBenchmarks();
}
std::vector<int64_t> CreateRange(int64_t lo, int64_t hi, int multi) {
std::vector<int64_t> args;
internal::AddRange(&args, lo, hi, multi);
return args;
}
std::vector<int64_t> CreateDenseRange(int64_t start, int64_t limit, int step) {
BM_CHECK_LE(start, limit);
std::vector<int64_t> args;
for (int64_t arg = start; arg <= limit; arg += step) {
args.push_back(arg);
}
return args;
}
} // end namespace benchmark

View File

@@ -1,33 +1,109 @@
#ifndef BENCHMARK_REGISTER_H
#define BENCHMARK_REGISTER_H
#include <algorithm>
#include <limits>
#include <vector>
#include "check.h"
namespace benchmark {
namespace internal {
// Append the powers of 'mult' in the closed interval [lo, hi].
// Returns iterator to the start of the inserted range.
template <typename T>
typename std::vector<T>::iterator AddPowers(std::vector<T>* dst, T lo, T hi,
int mult) {
BM_CHECK_GE(lo, 0);
BM_CHECK_GE(hi, lo);
BM_CHECK_GE(mult, 2);
const size_t start_offset = dst->size();
static const T kmax = std::numeric_limits<T>::max();
// Space out the values in multiples of "mult"
for (T i = static_cast<T>(1); i <= hi; i = static_cast<T>(i * mult)) {
if (i >= lo) {
dst->push_back(i);
}
// Break the loop here since multiplying by
// 'mult' would move outside of the range of T
if (i > kmax / mult) break;
}
return dst->begin() + static_cast<int>(start_offset);
}
template <typename T>
void AddNegatedPowers(std::vector<T>* dst, T lo, T hi, int mult) {
// We negate lo and hi so we require that they cannot be equal to 'min'.
BM_CHECK_GT(lo, std::numeric_limits<T>::min());
BM_CHECK_GT(hi, std::numeric_limits<T>::min());
BM_CHECK_GE(hi, lo);
BM_CHECK_LE(hi, 0);
// Add positive powers, then negate and reverse.
// Casts necessary since small integers get promoted
// to 'int' when negating.
const auto lo_complement = static_cast<T>(-lo);
const auto hi_complement = static_cast<T>(-hi);
const auto it = AddPowers(dst, hi_complement, lo_complement, mult);
std::for_each(it, dst->end(), [](T& t) { t = static_cast<T>(t * -1); });
std::reverse(it, dst->end());
}
template <typename T>
void AddRange(std::vector<T>* dst, T lo, T hi, int mult) {
CHECK_GE(lo, 0);
CHECK_GE(hi, lo);
CHECK_GE(mult, 2);
static_assert(std::is_integral<T>::value && std::is_signed<T>::value,
"Args type must be a signed integer");
BM_CHECK_GE(hi, lo);
BM_CHECK_GE(mult, 2);
// Add "lo"
dst->push_back(lo);
static const T kmax = std::numeric_limits<T>::max();
// Handle lo == hi as a special case, so we then know
// lo < hi and so it is safe to add 1 to lo and subtract 1
// from hi without falling outside of the range of T.
if (lo == hi) return;
// Now space out the benchmarks in multiples of "mult"
for (T i = 1; i < kmax / mult; i *= mult) {
if (i >= hi) break;
if (i > lo) {
dst->push_back(i);
}
// Ensure that lo_inner <= hi_inner below.
if (lo + 1 == hi) {
dst->push_back(hi);
return;
}
// Add "hi" (if different from "lo")
if (hi != lo) {
// Add all powers of 'mult' in the range [lo+1, hi-1] (inclusive).
const auto lo_inner = static_cast<T>(lo + 1);
const auto hi_inner = static_cast<T>(hi - 1);
// Insert negative values
if (lo_inner < 0) {
AddNegatedPowers(dst, lo_inner, std::min(hi_inner, T{-1}), mult);
}
// Treat 0 as a special case (see discussion on #762).
if (lo < 0 && hi >= 0) {
dst->push_back(0);
}
// Insert positive values
if (hi_inner > 0) {
AddPowers(dst, std::max(lo_inner, T{1}), hi_inner, mult);
}
// Add "hi" (if different from last value).
if (hi != dst->back()) {
dst->push_back(hi);
}
}
} // namespace internal
} // namespace benchmark
#endif // BENCHMARK_REGISTER_H

Some files were not shown because too many files have changed in this diff Show More