update google benchmark library to 1.9.4 (#9441)

* benchmark: update README and add update script

* update google benchmark library to 1.9.4

* update tnt CMakeLists to match the library new version
This commit is contained in:
Mathias Agopian
2025-11-19 11:50:34 -08:00
committed by GitHub
parent 3127632f96
commit 311104da97
180 changed files with 16394 additions and 4747 deletions

View File

@@ -1,5 +1,5 @@
---
Language: Cpp
BasedOnStyle: Google
PointerAlignment: Left
...

37
third_party/benchmark/.clang-tidy vendored Normal file
View File

@@ -0,0 +1,37 @@
---
Checks: >
abseil-*,
bugprone-*,
clang-analyzer-*,
cppcoreguidelines-*,
google-*,
misc-*,
performance-*,
readability-*,
-clang-analyzer-deadcode*,
-clang-analyzer-optin*,
-readability-identifier-length
WarningsAsErrors: ''
HeaderFilterRegex: ''
FormatStyle: none
CheckOptions:
llvm-else-after-return.WarnOnConditionVariables: 'false'
modernize-loop-convert.MinConfidence: reasonable
modernize-replace-auto-ptr.IncludeStyle: llvm
cert-str34-c.DiagnoseSignedUnsignedCharComparisons: 'false'
google-readability-namespace-comments.ShortNamespaceLines: '10'
cert-err33-c.CheckedFunctions: '::aligned_alloc;::asctime_s;::at_quick_exit;::atexit;::bsearch;::bsearch_s;::btowc;::c16rtomb;::c32rtomb;::calloc;::clock;::cnd_broadcast;::cnd_init;::cnd_signal;::cnd_timedwait;::cnd_wait;::ctime_s;::fclose;::fflush;::fgetc;::fgetpos;::fgets;::fgetwc;::fopen;::fopen_s;::fprintf;::fprintf_s;::fputc;::fputs;::fputwc;::fputws;::fread;::freopen;::freopen_s;::fscanf;::fscanf_s;::fseek;::fsetpos;::ftell;::fwprintf;::fwprintf_s;::fwrite;::fwscanf;::fwscanf_s;::getc;::getchar;::getenv;::getenv_s;::gets_s;::getwc;::getwchar;::gmtime;::gmtime_s;::localtime;::localtime_s;::malloc;::mbrtoc16;::mbrtoc32;::mbsrtowcs;::mbsrtowcs_s;::mbstowcs;::mbstowcs_s;::memchr;::mktime;::mtx_init;::mtx_lock;::mtx_timedlock;::mtx_trylock;::mtx_unlock;::printf_s;::putc;::putwc;::raise;::realloc;::remove;::rename;::scanf;::scanf_s;::setlocale;::setvbuf;::signal;::snprintf;::snprintf_s;::sprintf;::sprintf_s;::sscanf;::sscanf_s;::strchr;::strerror_s;::strftime;::strpbrk;::strrchr;::strstr;::strtod;::strtof;::strtoimax;::strtok;::strtok_s;::strtol;::strtold;::strtoll;::strtoul;::strtoull;::strtoumax;::strxfrm;::swprintf;::swprintf_s;::swscanf;::swscanf_s;::thrd_create;::thrd_detach;::thrd_join;::thrd_sleep;::time;::timespec_get;::tmpfile;::tmpfile_s;::tmpnam;::tmpnam_s;::tss_create;::tss_get;::tss_set;::ungetc;::ungetwc;::vfprintf;::vfprintf_s;::vfscanf;::vfscanf_s;::vfwprintf;::vfwprintf_s;::vfwscanf;::vfwscanf_s;::vprintf_s;::vscanf;::vscanf_s;::vsnprintf;::vsnprintf_s;::vsprintf;::vsprintf_s;::vsscanf;::vsscanf_s;::vswprintf;::vswprintf_s;::vswscanf;::vswscanf_s;::vwprintf_s;::vwscanf;::vwscanf_s;::wcrtomb;::wcschr;::wcsftime;::wcspbrk;::wcsrchr;::wcsrtombs;::wcsrtombs_s;::wcsstr;::wcstod;::wcstof;::wcstoimax;::wcstok;::wcstok_s;::wcstol;::wcstold;::wcstoll;::wcstombs;::wcstombs_s;::wcstoul;::wcstoull;::wcstoumax;::wcsxfrm;::wctob;::wctrans;::wctype;::wmemchr;::wprintf_s;::wscanf;::wscanf_s;'
cert-oop54-cpp.WarnOnlyIfThisHasSuspiciousField: 'false'
cert-dcl16-c.NewSuffixes: 'L;LL;LU;LLU'
google-readability-braces-around-statements.ShortStatementLines: '1'
cppcoreguidelines-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic: 'true'
google-readability-namespace-comments.SpacesBeforeComments: '2'
modernize-loop-convert.MaxCopySize: '16'
modernize-pass-by-value.IncludeStyle: llvm
modernize-use-nullptr.NullMacros: 'NULL'
llvm-qualified-auto.AddConstToQualified: 'false'
modernize-loop-convert.NamingStyle: CamelCase
llvm-else-after-return.WarnOnUnfixable: 'false'
google-readability-function-size.StatementThreshold: '800'
...

View File

@@ -0,0 +1 @@
.*third_party/.*

View File

@@ -0,0 +1,32 @@
---
name: Bug report
about: Create a report to help us improve
title: "[BUG]"
labels: ''
assignees: ''
---
**Describe the bug**
A clear and concise description of what the bug is.
**System**
Which OS, compiler, and compiler version are you using:
- OS:
- Compiler and version:
**To reproduce**
Steps to reproduce the behavior:
1. sync to commit ...
2. cmake/bazel...
3. make ...
4. See error
**Expected behavior**
A clear and concise description of what you expected to happen.
**Screenshots**
If applicable, add screenshots to help explain your problem.
**Additional context**
Add any other context about the problem here.

View File

@@ -0,0 +1,20 @@
---
name: Feature request
about: Suggest an idea for this project
title: "[FR]"
labels: ''
assignees: ''
---
**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
**Describe the solution you'd like**
A clear and concise description of what you want to happen.
**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.
**Additional context**
Add any other context or screenshots about the feature request here.

View File

@@ -0,0 +1,12 @@
if ! bazel version; then
arch=$(uname -m)
if [ "$arch" == "aarch64" ]; then
arch="arm64"
fi
echo "Downloading $arch Bazel binary from GitHub releases."
curl -L -o $HOME/bin/bazel --create-dirs "https://github.com/bazelbuild/bazel/releases/download/8.2.0/bazel-8.2.0-linux-$arch"
chmod +x $HOME/bin/bazel
else
# Bazel is installed for the correct architecture
exit 0
fi

35
third_party/benchmark/.github/libcxx-setup.sh vendored Executable file
View File

@@ -0,0 +1,35 @@
#!/usr/bin/env bash
set -e
# Checkout LLVM sources
git clone --filter=blob:none --depth=1 --branch llvmorg-19.1.6 --no-checkout https://github.com/llvm/llvm-project.git llvm-project
cd llvm-project
git sparse-checkout set --cone
git checkout llvmorg-19.1.6
git sparse-checkout set cmake llvm/cmake runtimes libcxx libcxxabi
cd ..
## Setup libc++ options
if [ -z "$BUILD_32_BITS" ]; then
export BUILD_32_BITS=OFF && echo disabling 32 bit build
fi
## Build and install libc++ (Use unstable ABI for better sanitizer coverage)
mkdir llvm-build && cd llvm-build
cmake -GNinja \
-DCMAKE_C_COMPILER=${CC} \
-DCMAKE_CXX_COMPILER=${CXX} \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DCMAKE_INSTALL_PREFIX=/usr \
-DLIBCXX_ABI_UNSTABLE=OFF \
-DLLVM_USE_SANITIZER=${LIBCXX_SANITIZER} \
-DLLVM_BUILD_32_BITS=${BUILD_32_BITS} \
-DLIBCXXABI_USE_LLVM_UNWINDER=OFF \
-DLLVM_INCLUDE_TESTS=OFF \
-DLIBCXX_INCLUDE_TESTS=OFF \
-DLIBCXX_INCLUDE_BENCHMARKS=OFF \
-DLLVM_ENABLE_RUNTIMES='libcxx;libcxxabi' \
../llvm-project/runtimes/
cmake --build . -- cxx cxxabi
cd ..

View File

@@ -0,0 +1,37 @@
name: bazel
on:
push: {}
pull_request: {}
env:
CMAKE_GENERATOR: Ninja
jobs:
build_and_test_default:
name: bazel.${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
steps:
- uses: actions/checkout@v4
- name: mount bazel cache
uses: actions/cache@v4
env:
cache-name: bazel-cache
with:
path: "~/.cache/bazel"
key: ${{ env.cache-name }}-${{ matrix.os }}-${{ github.ref }}
restore-keys: |
${{ env.cache-name }}-${{ matrix.os }}-main
- name: build
run: |
bazel build //:benchmark //:benchmark_main //test/...
- name: test
run: |
bazel test --test_output=all //test/...

View File

@@ -0,0 +1,49 @@
name: build-and-test-min-cmake
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
env:
CMAKE_GENERATOR: Ninja
jobs:
job:
name: ${{ matrix.os }}.min-cmake
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest]
steps:
- uses: actions/checkout@v4
- uses: lukka/get-cmake@latest
with:
cmakeVersion: 3.13.0
- name: create build environment
run: cmake -E make_directory ${{ runner.workspace }}/_build
- name: setup cmake initial cache
run: touch compiler-cache.cmake
- name: configure cmake
env:
CXX: ${{ matrix.compiler }}
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: >
cmake -C ${{ github.workspace }}/compiler-cache.cmake
$GITHUB_WORKSPACE
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
-DCMAKE_CXX_VISIBILITY_PRESET=hidden
-DCMAKE_VISIBILITY_INLINES_HIDDEN=ON
- name: build
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: cmake --build .

View File

@@ -0,0 +1,54 @@
name: build-and-test-perfcounters
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
env:
CMAKE_GENERATOR: Ninja
jobs:
job:
# TODO(dominic): Extend this to include compiler and set through env: CC/CXX.
name: ${{ matrix.os }}.${{ matrix.build_type }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
build_type: ['Release', 'Debug']
steps:
- uses: actions/checkout@v4
- name: install libpfm
run: |
sudo apt update
sudo apt -y install libpfm4-dev
- name: create build environment
run: cmake -E make_directory ${{ runner.workspace }}/_build
- name: configure cmake
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: >
cmake $GITHUB_WORKSPACE
-DBENCHMARK_ENABLE_LIBPFM=1
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
- name: build
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: cmake --build . --config ${{ matrix.build_type }}
# Skip testing, for now. It seems perf_event_open does not succeed on the
# hosting machine, very likely a permissions issue.
# TODO(mtrofin): Enable test.
# - name: test
# shell: bash
# working-directory: ${{ runner.workspace }}/_build
# run: ctest -C ${{ matrix.build_type }} --rerun-failed --output-on-failure

View File

@@ -0,0 +1,151 @@
name: build-and-test
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
env:
CMAKE_GENERATOR: Ninja
jobs:
# TODO: add 32-bit builds (g++ and clang++) for ubuntu
# (requires g++-multilib and libc6:i386)
# TODO: add coverage build (requires lcov)
# TODO: add clang + libc++ builds for ubuntu
job:
name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.lib }}.${{ matrix.compiler }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-24.04, ubuntu-22.04, ubuntu-24.04-arm, macos-latest]
build_type: ['Release', 'Debug']
compiler: ['g++', 'clang++']
lib: ['shared', 'static']
steps:
- name: Install dependencies (macos)
if: runner.os == 'macOS'
run: brew install ninja
- uses: actions/checkout@v4
- name: build
uses: threeal/cmake-action@v2.1.0
with:
build-dir: ${{ runner.workspace }}/_build
cxx-compiler: ${{ matrix.compiler }}
options: |
BENCHMARK_DOWNLOAD_DEPENDENCIES=ON
BUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }}
CMAKE_BUILD_TYPE=${{ matrix.build_type }}
CMAKE_CXX_COMPILER=${{ matrix.compiler }}
CMAKE_CXX_VISIBILITY_PRESET=hidden
CMAKE_VISIBILITY_INLINES_HIDDEN=ON
- name: test
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: ctest -C ${{ matrix.build_type }} -VV
msvc:
name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.lib }}.${{ matrix.msvc }}
runs-on: ${{ matrix.os }}
defaults:
run:
shell: powershell
strategy:
fail-fast: false
matrix:
msvc:
- VS-16-2019
- VS-17-2022
build_type:
- Debug
- Release
lib:
- shared
- static
include:
- msvc: VS-16-2019
os: windows-2019
generator: 'Visual Studio 16 2019'
- msvc: VS-17-2022
os: windows-2022
generator: 'Visual Studio 17 2022'
steps:
- uses: actions/checkout@v4
- uses: lukka/get-cmake@latest
- name: configure cmake
run: >
cmake -S . -B ${{ runner.workspace }}/_build/
-G "${{ matrix.generator }}"
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
-DBUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }}
- name: build
run: cmake --build ${{ runner.workspace }}/_build/ --config ${{ matrix.build_type }}
- name: test
run: ctest --test-dir ${{ runner.workspace }}/_build/ -C ${{ matrix.build_type }} -VV
msys2:
name: ${{ matrix.os }}.${{ matrix.build_type }}.${{ matrix.lib }}.${{ matrix.msys2.msystem }}
runs-on: ${{ matrix.os }}
defaults:
run:
shell: msys2 {0}
strategy:
fail-fast: false
matrix:
os: [ windows-latest ]
msys2:
- { msystem: MINGW64, arch: x86_64, family: GNU, compiler: g++ }
- { msystem: CLANG64, arch: x86_64, family: LLVM, compiler: clang++ }
- { msystem: UCRT64, arch: x86_64, family: GNU, compiler: g++ }
build_type:
- Debug
- Release
lib:
- shared
- static
steps:
- name: setup msys2
uses: msys2/setup-msys2@v2
with:
cache: false
msystem: ${{ matrix.msys2.msystem }}
update: true
install: >-
git
base-devel
pacboy: >-
gcc:p
clang:p
cmake:p
ninja:p
- uses: actions/checkout@v4
# NOTE: we can't use cmake actions here as we need to do everything in msys2 shell.
- name: configure cmake
env:
CXX: ${{ matrix.msys2.compiler }}
run: >
cmake -S . -B _build/
-GNinja
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
-DBUILD_SHARED_LIBS=${{ matrix.lib == 'shared' }}
- name: build
run: cmake --build _build/ --config ${{ matrix.build_type }}
- name: test
working-directory: _build
run: ctest -C ${{ matrix.build_type }} -VV

View File

@@ -0,0 +1,19 @@
name: clang-format-lint
on:
push: {}
pull_request: {}
env:
CMAKE_GENERATOR: Ninja
jobs:
job:
name: check-clang-format
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: DoozyX/clang-format-lint-action@v0.18.2
with:
source: './include/benchmark ./src ./test'
clangFormatVersion: 18

View File

@@ -0,0 +1,41 @@
name: clang-tidy
on:
push: {}
pull_request: {}
env:
CMAKE_GENERATOR: Ninja
jobs:
job:
name: run-clang-tidy
runs-on: ubuntu-latest
strategy:
fail-fast: false
steps:
- uses: actions/checkout@v4
- name: install clang-tidy
run: sudo apt update && sudo apt -y install clang-tidy
- name: create build environment
run: cmake -E make_directory ${{ github.workspace }}/_build
- name: configure cmake
shell: bash
working-directory: ${{ github.workspace }}/_build
run: >
cmake $GITHUB_WORKSPACE
-DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF
-DBENCHMARK_ENABLE_LIBPFM=OFF
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
-DCMAKE_C_COMPILER=clang
-DCMAKE_CXX_COMPILER=clang++
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
-DGTEST_COMPILE_COMMANDS=OFF
- name: run
shell: bash
working-directory: ${{ github.workspace }}/_build
run: run-clang-tidy -config-file=$GITHUB_WORKSPACE/.clang-tidy

View File

@@ -0,0 +1,31 @@
name: doxygen
on:
push:
branches: [main]
pull_request:
branches: [main]
env:
CMAKE_GENERATOR: Ninja
jobs:
build-and-deploy:
name: Build HTML documentation
runs-on: ubuntu-latest
steps:
- name: Fetching sources
uses: actions/checkout@v4
- name: Installing build dependencies
run: |
sudo apt update
sudo apt install doxygen gcc git
- name: Creating build directory
run: mkdir build
- name: Building HTML documentation with Doxygen
run: |
cmake -S . -B build -DBENCHMARK_ENABLE_TESTING:BOOL=OFF -DBENCHMARK_ENABLE_DOXYGEN:BOOL=ON -DBENCHMARK_INSTALL_DOCS:BOOL=ON
cmake --build build --target benchmark_doxygen

View File

@@ -0,0 +1,41 @@
name: python + Bazel pre-commit checks
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
env:
CMAKE_GENERATOR: Ninja
jobs:
pre-commit:
runs-on: ubuntu-latest
env:
MYPY_CACHE_DIR: "${{ github.workspace }}/.cache/mypy"
RUFF_CACHE_DIR: "${{ github.workspace }}/.cache/ruff"
PRE_COMMIT_HOME: "${{ github.workspace }}/.cache/pre-commit"
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: 3.11
cache: pip
cache-dependency-path: pyproject.toml
- name: Install dependencies
run: python -m pip install ".[dev]"
- name: Cache pre-commit tools
uses: actions/cache@v4
with:
path: |
${{ env.MYPY_CACHE_DIR }}
${{ env.RUFF_CACHE_DIR }}
${{ env.PRE_COMMIT_HOME }}
key: ${{ runner.os }}-${{ hashFiles('.pre-commit-config.yaml') }}-linter-cache
- name: Run pre-commit checks
run: pre-commit run --all-files --verbose --show-diff-on-failure

View File

@@ -0,0 +1,97 @@
name: sanitizer
on:
push: {}
pull_request: {}
env:
CMAKE_GENERATOR: Ninja
UBSAN_OPTIONS: "print_stacktrace=1"
jobs:
job:
name: ${{ matrix.sanitizer }}.${{ matrix.build_type }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
build_type: ['Debug', 'RelWithDebInfo']
sanitizer: ['asan', 'ubsan', 'tsan', 'msan']
steps:
- uses: actions/checkout@v4
- name: configure msan env
if: matrix.sanitizer == 'msan'
run: |
echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins" >> $GITHUB_ENV
echo "LIBCXX_SANITIZER=MemoryWithOrigins" >> $GITHUB_ENV
- name: configure ubsan env
if: matrix.sanitizer == 'ubsan'
run: |
echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=all" >> $GITHUB_ENV
echo "LIBCXX_SANITIZER=Undefined" >> $GITHUB_ENV
- name: configure asan env
if: matrix.sanitizer == 'asan'
run: |
echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=address -fno-sanitize-recover=all" >> $GITHUB_ENV
echo "LIBCXX_SANITIZER=Address" >> $GITHUB_ENV
- name: configure tsan env
if: matrix.sanitizer == 'tsan'
run: |
echo "EXTRA_FLAGS=-g -O2 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all" >> $GITHUB_ENV
echo "LIBCXX_SANITIZER=Thread" >> $GITHUB_ENV
- name: fine-tune asan options
# in asan we get an error from std::regex. ignore it.
if: matrix.sanitizer == 'asan'
run: |
echo "ASAN_OPTIONS=alloc_dealloc_mismatch=0" >> $GITHUB_ENV
- name: setup clang
uses: egor-tensin/setup-clang@v1
with:
version: latest
platform: x64
- name: configure clang
run: |
echo "CC=cc" >> $GITHUB_ENV
echo "CXX=c++" >> $GITHUB_ENV
- name: build libc++ (non-asan)
if: matrix.sanitizer != 'asan'
run: |
"${GITHUB_WORKSPACE}/.github/libcxx-setup.sh"
echo "EXTRA_CXX_FLAGS=-stdlib=libc++ -L${GITHUB_WORKSPACE}/llvm-build/lib -lc++abi -I${GITHUB_WORKSPACE}/llvm-build/include/c++/v1 -Isystem${GITHUB_WORKSPACE}/llvm-build/include/c++/v1 -Wl,-rpath,${GITHUB_WORKSPACE}/llvm-build/lib" >> $GITHUB_ENV
- name: create build environment
run: cmake -E make_directory ${{ runner.workspace }}/_build
- name: configure cmake
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: >
VERBOSE=1
cmake -GNinja $GITHUB_WORKSPACE
-DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF
-DBENCHMARK_ENABLE_LIBPFM=OFF
-DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
-DCMAKE_C_COMPILER=${{ env.CC }}
-DCMAKE_CXX_COMPILER=${{ env.CXX }}
-DCMAKE_C_FLAGS="${{ env.EXTRA_FLAGS }}"
-DCMAKE_CXX_FLAGS="${{ env.EXTRA_FLAGS }} ${{ env.EXTRA_CXX_FLAGS }}"
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
- name: build
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: cmake --build . --config ${{ matrix.build_type }}
- name: test
shell: bash
working-directory: ${{ runner.workspace }}/_build
run: ctest -C ${{ matrix.build_type }} -VV

View File

@@ -0,0 +1,33 @@
name: test-bindings
on:
push:
branches: [main]
pull_request:
branches: [main]
env:
CMAKE_GENERATOR: Ninja
jobs:
python_bindings:
name: Test GBM Python ${{ matrix.python-version }} bindings on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ ubuntu-latest, macos-latest, windows-latest ]
python-version: [ "3.10", "3.11", "3.12", "3.13" ]
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install GBM Python bindings on ${{ matrix.os }}
run: python -m pip install .
- name: Run example on ${{ matrix.os }} under Python ${{ matrix.python-version }}
run: python bindings/python/google_benchmark/example.py

View File

@@ -0,0 +1,83 @@
name: Build and upload Python wheels
on:
workflow_dispatch:
release:
types:
- published
env:
CMAKE_GENERATOR: Ninja
jobs:
build_sdist:
name: Build source distribution
runs-on: ubuntu-latest
steps:
- name: Check out repo
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Install Python 3.12
uses: actions/setup-python@v5
with:
python-version: "3.12"
- run: python -m pip install build
- name: Build sdist
run: python -m build --sdist
- uses: actions/upload-artifact@v4
with:
name: dist-sdist
path: dist/*.tar.gz
build_wheels:
name: Build Google Benchmark wheels on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, ubuntu-24.04-arm, macos-13, macos-14, windows-latest]
steps:
- name: Check out Google Benchmark
uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/setup-python@v5
name: Install Python 3.12
with:
python-version: "3.12"
- run: pip install --upgrade pip uv
- name: Build wheels on ${{ matrix.os }} using cibuildwheel
uses: pypa/cibuildwheel@v2.23.2
env:
CIBW_BUILD: "cp310-* cp311-* cp312-*"
CIBW_BUILD_FRONTEND: "build[uv]"
CIBW_SKIP: "*-musllinux_*"
CIBW_ARCHS: auto64
CIBW_BEFORE_ALL_LINUX: bash .github/install_bazel.sh
# Grab the rootless Bazel installation inside the container.
CIBW_ENVIRONMENT_LINUX: PATH=$PATH:$HOME/bin
CIBW_TEST_COMMAND: python {project}/bindings/python/google_benchmark/example.py
# unused by Bazel, but needed explicitly by delocate on MacOS.
MACOSX_DEPLOYMENT_TARGET: "10.14"
- name: Upload Google Benchmark ${{ matrix.os }} wheels
uses: actions/upload-artifact@v4
with:
name: dist-${{ matrix.os }}
path: wheelhouse/*.whl
pypi_upload:
name: Publish google-benchmark wheels to PyPI
needs: [build_sdist, build_wheels]
runs-on: ubuntu-latest
permissions:
id-token: write
steps:
- uses: actions/download-artifact@v4
with:
path: dist
pattern: dist-*
merge-multiple: true
- uses: pypa/gh-action-pypi-publish@release/v1

View File

@@ -8,8 +8,10 @@
!/cmake/*.cmake
!/test/AssemblyTests.cmake
*~
*.swp
*.pyc
__pycache__
.DS_Store
# lcov
*.lcov
@@ -44,6 +46,7 @@ rules.ninja
# bazel output symlinks.
bazel-*
MODULE.bazel.lock
# out-of-source build top-level folders.
build/
@@ -56,3 +59,10 @@ build*/
# Visual Studio 2015/2017 cache/options directory
.vs/
CMakeSettings.json
# Visual Studio Code cache/options directory
.vscode/
# Python build stuff
dist/
*.egg-info*

View File

@@ -0,0 +1,18 @@
repos:
- repo: https://github.com/keith/pre-commit-buildifier
rev: 8.0.3
hooks:
- id: buildifier
- id: buildifier-lint
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.15.0
hooks:
- id: mypy
types_or: [ python, pyi ]
args: [ "--ignore-missing-imports", "--scripts-are-modules" ]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.11.8
hooks:
- id: ruff
args: [ --fix, --exit-non-zero-on-fix ]
- id: ruff-format

View File

@@ -1,28 +0,0 @@
#!/usr/bin/env bash
# Install a newer CMake version
curl -sSL https://cmake.org/files/v3.6/cmake-3.6.1-Linux-x86_64.sh -o install-cmake.sh
chmod +x install-cmake.sh
sudo ./install-cmake.sh --prefix=/usr/local --skip-license
# Checkout LLVM sources
git clone --depth=1 https://github.com/llvm-mirror/llvm.git llvm-source
git clone --depth=1 https://github.com/llvm-mirror/libcxx.git llvm-source/projects/libcxx
git clone --depth=1 https://github.com/llvm-mirror/libcxxabi.git llvm-source/projects/libcxxabi
# Setup libc++ options
if [ -z "$BUILD_32_BITS" ]; then
export BUILD_32_BITS=OFF && echo disabling 32 bit build
fi
# Build and install libc++ (Use unstable ABI for better sanitizer coverage)
mkdir llvm-build && cd llvm-build
cmake -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${COMPILER} \
-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX=/usr \
-DLIBCXX_ABI_UNSTABLE=ON \
-DLLVM_USE_SANITIZER=${LIBCXX_SANITIZER} \
-DLLVM_BUILD_32_BITS=${BUILD_32_BITS} \
../llvm-source
make cxx -j2
sudo make install-cxxabi install-cxx
cd ../

View File

@@ -1,199 +0,0 @@
sudo: required
dist: trusty
language: cpp
env:
global:
- /usr/local/bin:$PATH
matrix:
include:
- compiler: gcc
addons:
apt:
packages:
- lcov
env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Coverage
- compiler: gcc
env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Debug
- compiler: gcc
env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Release
- compiler: gcc
addons:
apt:
packages:
- g++-multilib
env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Debug BUILD_32_BITS=ON
- compiler: gcc
addons:
apt:
packages:
- g++-multilib
env: COMPILER=g++ C_COMPILER=gcc BUILD_TYPE=Release BUILD_32_BITS=ON
- compiler: gcc
env:
- INSTALL_GCC6_FROM_PPA=1
- COMPILER=g++-6 C_COMPILER=gcc-6 BUILD_TYPE=Debug
- ENABLE_SANITIZER=1
- EXTRA_FLAGS="-fno-omit-frame-pointer -g -O2 -fsanitize=undefined,address -fuse-ld=gold"
- compiler: clang
env: COMPILER=clang++ C_COMPILER=clang BUILD_TYPE=Debug
- compiler: clang
env: COMPILER=clang++ C_COMPILER=clang BUILD_TYPE=Release
# Clang w/ libc++
- compiler: clang
addons:
apt:
packages:
clang-3.8
env:
- COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug
- LIBCXX_BUILD=1
- EXTRA_FLAGS="-stdlib=libc++"
- compiler: clang
addons:
apt:
packages:
clang-3.8
env:
- COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Release
- LIBCXX_BUILD=1
- EXTRA_FLAGS="-stdlib=libc++"
# Clang w/ 32bit libc++
- compiler: clang
addons:
apt:
packages:
- clang-3.8
- g++-multilib
env:
- COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug
- LIBCXX_BUILD=1
- BUILD_32_BITS=ON
- EXTRA_FLAGS="-stdlib=libc++ -m32"
# Clang w/ 32bit libc++
- compiler: clang
addons:
apt:
packages:
- clang-3.8
- g++-multilib
env:
- COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Release
- LIBCXX_BUILD=1
- BUILD_32_BITS=ON
- EXTRA_FLAGS="-stdlib=libc++ -m32"
# Clang w/ libc++, ASAN, UBSAN
- compiler: clang
addons:
apt:
packages:
clang-3.8
env:
- COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug
- LIBCXX_BUILD=1 LIBCXX_SANITIZER="Undefined;Address"
- ENABLE_SANITIZER=1
- EXTRA_FLAGS="-stdlib=libc++ -g -O2 -fno-omit-frame-pointer -fsanitize=undefined,address -fno-sanitize-recover=all"
- UBSAN_OPTIONS=print_stacktrace=1
# Clang w/ libc++ and MSAN
- compiler: clang
addons:
apt:
packages:
clang-3.8
env:
- COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=Debug
- LIBCXX_BUILD=1 LIBCXX_SANITIZER=MemoryWithOrigins
- ENABLE_SANITIZER=1
- EXTRA_FLAGS="-stdlib=libc++ -g -O2 -fno-omit-frame-pointer -fsanitize=memory -fsanitize-memory-track-origins"
# Clang w/ libc++ and MSAN
- compiler: clang
addons:
apt:
packages:
clang-3.8
env:
- COMPILER=clang++-3.8 C_COMPILER=clang-3.8 BUILD_TYPE=RelWithDebInfo
- LIBCXX_BUILD=1 LIBCXX_SANITIZER=Thread
- ENABLE_SANITIZER=1
- EXTRA_FLAGS="-stdlib=libc++ -g -O2 -fno-omit-frame-pointer -fsanitize=thread -fno-sanitize-recover=all"
- os: osx
osx_image: xcode8.3
compiler: clang
env:
- COMPILER=clang++ BUILD_TYPE=Debug
- os: osx
osx_image: xcode8.3
compiler: clang
env:
- COMPILER=clang++ BUILD_TYPE=Release
- os: osx
osx_image: xcode8.3
compiler: clang
env:
- COMPILER=clang++ BUILD_TYPE=Release BUILD_32_BITS=ON
- os: osx
osx_image: xcode8.3
compiler: gcc
env:
- COMPILER=g++-7 C_COMPILER=gcc-7 BUILD_TYPE=Debug
before_script:
- if [ -n "${LIBCXX_BUILD}" ]; then
source .travis-libcxx-setup.sh;
fi
- if [ -n "${ENABLE_SANITIZER}" ]; then
export EXTRA_OPTIONS="-DBENCHMARK_ENABLE_ASSEMBLY_TESTS=OFF";
else
export EXTRA_OPTIONS="";
fi
- mkdir -p build && cd build
before_install:
- if [ -z "$BUILD_32_BITS" ]; then
export BUILD_32_BITS=OFF && echo disabling 32 bit build;
fi
- if [ -n "${INSTALL_GCC6_FROM_PPA}" ]; then
sudo add-apt-repository -y "ppa:ubuntu-toolchain-r/test";
sudo apt-get update --option Acquire::Retries=100 --option Acquire::http::Timeout="60";
fi
install:
- if [ -n "${INSTALL_GCC6_FROM_PPA}" ]; then
travis_wait sudo -E apt-get -yq --no-install-suggests --no-install-recommends install g++-6;
fi
- if [ "${TRAVIS_OS_NAME}" == "linux" -a "${BUILD_32_BITS}" == "OFF" ]; then
travis_wait sudo -E apt-get -y --no-install-suggests --no-install-recommends install llvm-3.9-tools;
sudo cp /usr/lib/llvm-3.9/bin/FileCheck /usr/local/bin/;
fi
- if [ "${BUILD_TYPE}" == "Coverage" -a "${TRAVIS_OS_NAME}" == "linux" ]; then
PATH=~/.local/bin:${PATH};
pip install --user --upgrade pip;
travis_wait pip install --user cpp-coveralls;
fi
- if [ "${C_COMPILER}" == "gcc-7" -a "${TRAVIS_OS_NAME}" == "osx" ]; then
rm -f /usr/local/include/c++;
brew update;
travis_wait brew install gcc@7;
fi
- if [ "${TRAVIS_OS_NAME}" == "linux" ]; then
sudo apt-get update -qq;
sudo apt-get install -qq unzip;
wget https://github.com/bazelbuild/bazel/releases/download/0.10.1/bazel-0.10.1-installer-linux-x86_64.sh --output-document bazel-installer.sh;
travis_wait sudo bash bazel-installer.sh;
fi
- if [ "${TRAVIS_OS_NAME}" == "osx" ]; then
curl -L -o bazel-installer.sh https://github.com/bazelbuild/bazel/releases/download/0.10.1/bazel-0.10.1-installer-darwin-x86_64.sh;
travis_wait sudo bash bazel-installer.sh;
fi
script:
- cmake -DCMAKE_C_COMPILER=${C_COMPILER} -DCMAKE_CXX_COMPILER=${COMPILER} -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DCMAKE_CXX_FLAGS="${EXTRA_FLAGS}" -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -DBENCHMARK_BUILD_32_BITS=${BUILD_32_BITS} ${EXTRA_OPTIONS} ..
- make
- ctest -C ${BUILD_TYPE} --output-on-failure
- bazel test -c dbg --define google_benchmark.have_regex=posix --announce_rc --verbose_failures --test_output=errors --keep_going //test/...
after_success:
- if [ "${BUILD_TYPE}" == "Coverage" -a "${TRAVIS_OS_NAME}" == "linux" ]; then
coveralls --include src --include include --gcov-options '\-lp' --root .. --build-root .;
fi

View File

@@ -1,25 +1,30 @@
import os
import ycm_core
# These are the compilation flags that will be used in case there's no
# compilation database set (by default, one is not set).
# CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR.
flags = [
'-Wall',
'-Werror',
'-pedantic-errors',
'-std=c++0x',
'-fno-strict-aliasing',
'-O3',
'-DNDEBUG',
# ...and the same thing goes for the magic -x option which specifies the
# language that the files to be compiled are written in. This is mostly
# relevant for c++ headers.
# For a C project, you would set this to 'c' instead of 'c++'.
'-x', 'c++',
'-I', 'include',
'-isystem', '/usr/include',
'-isystem', '/usr/local/include',
"-Wall",
"-Werror",
"-pedantic-errors",
"-std=c++0x",
"-fno-strict-aliasing",
"-O3",
"-DNDEBUG",
# ...and the same thing goes for the magic -x option which specifies the
# language that the files to be compiled are written in. This is mostly
# relevant for c++ headers.
# For a C project, you would set this to 'c' instead of 'c++'.
"-x",
"c++",
"-I",
"include",
"-isystem",
"/usr/include",
"-isystem",
"/usr/local/include",
]
@@ -29,87 +34,87 @@ flags = [
#
# Most projects will NOT need to set this to anything; you can just change the
# 'flags' list of compilation flags. Notice that YCM itself uses that approach.
compilation_database_folder = ''
compilation_database_folder = ""
if os.path.exists( compilation_database_folder ):
database = ycm_core.CompilationDatabase( compilation_database_folder )
if os.path.exists(compilation_database_folder):
database = ycm_core.CompilationDatabase(compilation_database_folder)
else:
database = None
database = None
SOURCE_EXTENSIONS = [".cc"]
SOURCE_EXTENSIONS = [ '.cc' ]
def DirectoryOfThisScript():
return os.path.dirname( os.path.abspath( __file__ ) )
return os.path.dirname(os.path.abspath(__file__))
def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
if not working_directory:
return list( flags )
new_flags = []
make_next_absolute = False
path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
for flag in flags:
new_flag = flag
def MakeRelativePathsInFlagsAbsolute(flags, working_directory):
if not working_directory:
return list(flags)
new_flags = []
make_next_absolute = False
path_flags = ["-isystem", "-I", "-iquote", "--sysroot="]
for flag in flags:
new_flag = flag
if make_next_absolute:
make_next_absolute = False
if not flag.startswith( '/' ):
new_flag = os.path.join( working_directory, flag )
if make_next_absolute:
make_next_absolute = False
if not flag.startswith("/"):
new_flag = os.path.join(working_directory, flag)
for path_flag in path_flags:
if flag == path_flag:
make_next_absolute = True
break
for path_flag in path_flags:
if flag == path_flag:
make_next_absolute = True
break
if flag.startswith( path_flag ):
path = flag[ len( path_flag ): ]
new_flag = path_flag + os.path.join( working_directory, path )
break
if flag.startswith(path_flag):
path = flag[len(path_flag) :]
new_flag = path_flag + os.path.join(working_directory, path)
break
if new_flag:
new_flags.append( new_flag )
return new_flags
if new_flag:
new_flags.append(new_flag)
return new_flags
def IsHeaderFile( filename ):
extension = os.path.splitext( filename )[ 1 ]
return extension in [ '.h', '.hxx', '.hpp', '.hh' ]
def IsHeaderFile(filename):
extension = os.path.splitext(filename)[1]
return extension in [".h", ".hxx", ".hpp", ".hh"]
def GetCompilationInfoForFile( filename ):
# The compilation_commands.json file generated by CMake does not have entries
# for header files. So we do our best by asking the db for flags for a
# corresponding source file, if any. If one exists, the flags for that file
# should be good enough.
if IsHeaderFile( filename ):
basename = os.path.splitext( filename )[ 0 ]
for extension in SOURCE_EXTENSIONS:
replacement_file = basename + extension
if os.path.exists( replacement_file ):
compilation_info = database.GetCompilationInfoForFile(
replacement_file )
if compilation_info.compiler_flags_:
return compilation_info
return None
return database.GetCompilationInfoForFile( filename )
def GetCompilationInfoForFile(filename):
# The compilation_commands.json file generated by CMake does not have
# entries for header files. So we do our best by asking the db for flags for
# a corresponding source file, if any. If one exists, the flags for that
# file should be good enough.
if IsHeaderFile(filename):
basename = os.path.splitext(filename)[0]
for extension in SOURCE_EXTENSIONS:
replacement_file = basename + extension
if os.path.exists(replacement_file):
compilation_info = database.GetCompilationInfoForFile(
replacement_file
)
if compilation_info.compiler_flags_:
return compilation_info
return None
return database.GetCompilationInfoForFile(filename)
def FlagsForFile( filename, **kwargs ):
if database:
# Bear in mind that compilation_info.compiler_flags_ does NOT return a
# python list, but a "list-like" StringVec object
compilation_info = GetCompilationInfoForFile( filename )
if not compilation_info:
return None
def FlagsForFile(filename, **kwargs):
if database:
# Bear in mind that compilation_info.compiler_flags_ does NOT return a
# python list, but a "list-like" StringVec object
compilation_info = GetCompilationInfoForFile(filename)
if not compilation_info:
return None
final_flags = MakeRelativePathsInFlagsAbsolute(
compilation_info.compiler_flags_,
compilation_info.compiler_working_dir_ )
else:
relative_to = DirectoryOfThisScript()
final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to )
final_flags = MakeRelativePathsInFlagsAbsolute(
compilation_info.compiler_flags_,
compilation_info.compiler_working_dir_,
)
else:
relative_to = DirectoryOfThisScript()
final_flags = MakeRelativePathsInFlagsAbsolute(flags, relative_to)
return {
'flags': final_flags,
'do_cache': True
}
return {"flags": final_flags, "do_cache": True}

View File

@@ -9,40 +9,64 @@
# Please keep the list sorted.
Albert Pretorius <pretoalb@gmail.com>
Alex Steele <steeleal123@gmail.com>
Andriy Berestovskyy <berestovskyy@gmail.com>
Arne Beer <arne@twobeer.de>
Carto
Cezary Skrzyński <czars1988@gmail.com>
Christian Wassermann <christian_wassermann@web.de>
Christopher Seymour <chris.j.seymour@hotmail.com>
Colin Braley <braley.colin@gmail.com>
Daniel Harvey <danielharvey458@gmail.com>
David Coeurjolly <david.coeurjolly@liris.cnrs.fr>
Deniz Evrenci <denizevrenci@gmail.com>
Dirac Research
Dominik Czarnota <dominik.b.czarnota@gmail.com>
Dominik Korman <kormandominik@gmail.com>
Donald Aingworth <donalds_junk_mail@yahoo.com>
Eric Backus <eric_backus@alum.mit.edu>
Eric Fiselier <eric@efcs.ca>
Eugene Zhuk <eugene.zhuk@gmail.com>
Evgeny Safronov <division494@gmail.com>
Fabien Pichot <pichot.fabien@gmail.com>
Federico Ficarelli <federico.ficarelli@gmail.com>
Felix Homann <linuxaudio@showlabor.de>
Gergely Meszaros <maetveis@gmail.com>
Gergő Szitár <szitar.gergo@gmail.com>
Google Inc.
Henrique Bucher <hbucher@gmail.com>
International Business Machines Corporation
Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com>
Jern-Kuan Leong <jernkuan@gmail.com>
JianXiong Zhou <zhoujianxiong2@gmail.com>
Joao Paulo Magalhaes <joaoppmagalhaes@gmail.com>
Jordan Williams <jwillikers@protonmail.com>
Jussi Knuuttila <jussi.knuuttila@gmail.com>
Kaito Udagawa <umireon@gmail.com>
Kishan Kumar <kumar.kishan@outlook.com>
Lei Xu <eddyxu@gmail.com>
Marcel Jacobse <mjacobse@uni-bremen.de>
Matt Clarkson <mattyclarkson@gmail.com>
Maxim Vafin <maxvafin@gmail.com>
Mike Apodaca <gatorfax@gmail.com>
Min-Yih Hsu <yihshyng223@gmail.com>
MongoDB Inc.
Nick Hutchinson <nshutchinson@gmail.com>
Norman Heino <norman.heino@gmail.com>
Oleksandr Sochka <sasha.sochka@gmail.com>
Ori Livneh <ori.livneh@gmail.com>
Paul Redmond <paul.redmond@gmail.com>
Radoslav Yovchev <radoslav.tm@gmail.com>
Raghu Raja <raghu@enfabrica.net>
Rainer Orth <ro@cebitec.uni-bielefeld.de>
Roman Lebedev <lebedev.ri@gmail.com>
Sayan Bhattacharjee <aero.sayan@gmail.com>
Shapr3D <google-contributors@shapr3d.com>
Shuo Chen <chenshuo@chenshuo.com>
Staffan Tjernstrom <staffantj@gmail.com>
Steinar H. Gunderson <sgunderson@bigfoot.com>
Stripe, Inc.
Tobias Schmidt <tobias.schmidt@in.tum.de>
Yixuan Qiu <yixuanq@gmail.com>
Yusuke Suzuki <utatane.tea@gmail.com>
Zbigniew Skowron <zbychs@gmail.com>

View File

@@ -1,9 +1,38 @@
load("@rules_cc//cc:defs.bzl", "cc_library")
licenses(["notice"])
COPTS = [
"-pedantic",
"-pedantic-errors",
"-std=c++17",
"-Wall",
"-Wconversion",
"-Wextra",
"-Wshadow",
# "-Wshorten-64-to-32",
"-Wfloat-equal",
"-fstrict-aliasing",
## assert() are used a lot in tests upstream, which may be optimised out leading to
## unused-variable warning.
"-Wno-unused-variable",
"-Werror=old-style-cast",
]
MSVC_COPTS = [
"/std:c++17",
]
config_setting(
name = "windows",
values = {
"cpu": "x64_windows",
constraint_values = ["@platforms//os:windows"],
visibility = [":__subpackages__"],
)
config_setting(
name = "perfcounters",
define_values = {
"pfm": "1",
},
visibility = [":__subpackages__"],
)
@@ -17,20 +46,51 @@ cc_library(
],
exclude = ["src/benchmark_main.cc"],
),
hdrs = ["include/benchmark/benchmark.h"],
hdrs = [
"include/benchmark/benchmark.h",
"include/benchmark/export.h",
],
copts = select({
":windows": MSVC_COPTS,
"//conditions:default": COPTS,
}),
defines = [
"BENCHMARK_STATIC_DEFINE",
"BENCHMARK_VERSION=\\\"" + (module_version() if module_version() != None else "") + "\\\"",
] + select({
":perfcounters": ["HAVE_LIBPFM"],
"//conditions:default": [],
}),
includes = ["include"],
linkopts = select({
":windows": ["-DEFAULTLIB:shlwapi.lib"],
"//conditions:default": ["-pthread"],
}),
strip_include_prefix = "include",
# Only static linking is allowed; no .so will be produced.
# Using `defines` (i.e. not `local_defines`) means that no
# dependent rules need to bother about defining the macro.
linkstatic = True,
local_defines = [
# Turn on Large-file Support
"_FILE_OFFSET_BITS=64",
"_LARGEFILE64_SOURCE",
"_LARGEFILE_SOURCE",
],
visibility = ["//visibility:public"],
deps = select({
":perfcounters": ["@libpfm"],
"//conditions:default": [],
}),
)
cc_library(
name = "benchmark_main",
srcs = ["src/benchmark_main.cc"],
hdrs = ["include/benchmark/benchmark.h"],
strip_include_prefix = "include",
hdrs = [
"include/benchmark/benchmark.h",
"include/benchmark/export.h",
],
includes = ["include"],
visibility = ["//visibility:public"],
deps = [":benchmark"],
)

View File

@@ -1,27 +1,34 @@
cmake_minimum_required (VERSION 2.8.12)
# Require CMake 3.10. If available, use the policies up to CMake 3.22.
cmake_minimum_required (VERSION 3.13...3.22)
project (benchmark)
foreach(p
CMP0054 # CMake 3.1
CMP0056 # export EXE_LINKER_FLAGS to try_run
CMP0057 # Support no if() IN_LIST operator
)
if(POLICY ${p})
cmake_policy(SET ${p} NEW)
endif()
endforeach()
project (benchmark VERSION 1.9.4 LANGUAGES CXX)
option(BENCHMARK_ENABLE_TESTING "Enable testing of the benchmark library." ON)
option(BENCHMARK_ENABLE_EXCEPTIONS "Enable the use of exceptions in the benchmark library." ON)
option(BENCHMARK_ENABLE_LTO "Enable link time optimisation of the benchmark library." OFF)
option(BENCHMARK_USE_LIBCXX "Build and test using libc++ as the standard library." OFF)
if(NOT MSVC)
option(BENCHMARK_ENABLE_WERROR "Build Release candidates with -Werror." ON)
option(BENCHMARK_FORCE_WERROR "Build Release candidates with -Werror regardless of compiler issues." OFF)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "PGI")
# PGC++ maybe reporting false positives.
set(BENCHMARK_ENABLE_WERROR OFF)
endif()
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "NVHPC")
set(BENCHMARK_ENABLE_WERROR OFF)
endif()
if(BENCHMARK_FORCE_WERROR)
set(BENCHMARK_ENABLE_WERROR ON)
endif(BENCHMARK_FORCE_WERROR)
if(NOT (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC"))
option(BENCHMARK_BUILD_32_BITS "Build a 32 bit version of the library." OFF)
else()
set(BENCHMARK_BUILD_32_BITS OFF CACHE BOOL "Build a 32 bit version of the library - unsupported when using MSVC)" FORCE)
endif()
option(BENCHMARK_ENABLE_INSTALL "Enable installation of benchmark. (Projects embedding benchmark may want to turn this OFF.)" ON)
option(BENCHMARK_ENABLE_DOXYGEN "Build documentation with Doxygen." OFF)
option(BENCHMARK_INSTALL_DOCS "Enable installation of documentation." ON)
# Allow unmet dependencies to be met using CMake's ExternalProject mechanics, which
# may require downloading the source code.
@@ -30,6 +37,24 @@ option(BENCHMARK_DOWNLOAD_DEPENDENCIES "Allow the downloading and in-tree buildi
# This option can be used to disable building and running unit tests which depend on gtest
# in cases where it is not possible to build or find a valid version of gtest.
option(BENCHMARK_ENABLE_GTEST_TESTS "Enable building the unit tests which depend on gtest" ON)
option(BENCHMARK_USE_BUNDLED_GTEST "Use bundled GoogleTest. If disabled, the find_package(GTest) will be used." ON)
option(BENCHMARK_ENABLE_LIBPFM "Enable performance counters provided by libpfm" OFF)
# Export only public symbols
set(CMAKE_CXX_VISIBILITY_PRESET hidden)
set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# As of CMake 3.18, CMAKE_SYSTEM_PROCESSOR is not set properly for MSVC and
# cross-compilation (e.g. Host=x86_64, target=aarch64) requires using the
# undocumented, but working variable.
# See https://gitlab.kitware.com/cmake/cmake/-/issues/15170
set(CMAKE_SYSTEM_PROCESSOR ${MSVC_CXX_ARCHITECTURE_ID})
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ARM")
set(CMAKE_CROSSCOMPILING TRUE)
endif()
endif()
set(ENABLE_ASSEMBLY_TESTS_DEFAULT OFF)
function(should_enable_assembly_tests)
@@ -41,7 +66,7 @@ function(should_enable_assembly_tests)
return()
endif()
endif()
if (MSVC)
if (MSVC OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC")
return()
elseif(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
return()
@@ -77,29 +102,63 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
include(GetGitVersion)
get_git_version(GIT_VERSION)
# If no git version can be determined, use the version
# from the project() command
if ("${GIT_VERSION}" STREQUAL "v0.0.0")
set(VERSION "v${benchmark_VERSION}")
else()
set(VERSION "${GIT_VERSION}")
endif()
# Normalize version: drop "v" prefix, replace first "-" with ".",
# drop everything after second "-" (including said "-").
string(STRIP ${VERSION} VERSION)
if(VERSION MATCHES v[^-]*-)
string(REGEX REPLACE "v([^-]*)-([0-9]+)-.*" "\\1.\\2" NORMALIZED_VERSION ${VERSION})
else()
string(REGEX REPLACE "v(.*)" "\\1" NORMALIZED_VERSION ${VERSION})
endif()
# Tell the user what versions we are using
string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" VERSION ${GIT_VERSION})
message(STATUS "Version: ${VERSION}")
message(STATUS "Google Benchmark version: ${VERSION}, normalized to ${NORMALIZED_VERSION}")
# The version of the libraries
set(GENERIC_LIB_VERSION ${VERSION})
string(SUBSTRING ${VERSION} 0 1 GENERIC_LIB_SOVERSION)
set(GENERIC_LIB_VERSION ${NORMALIZED_VERSION})
string(SUBSTRING ${NORMALIZED_VERSION} 0 1 GENERIC_LIB_SOVERSION)
# Import our CMake modules
include(CheckCXXCompilerFlag)
include(AddCXXCompilerFlag)
include(CheckCXXCompilerFlag)
include(CheckLibraryExists)
include(CXXFeatureCheck)
check_library_exists(rt shm_open "" HAVE_LIB_RT)
if (BENCHMARK_BUILD_32_BITS)
add_required_cxx_compiler_flag(-m32)
endif()
set(BENCHMARK_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD ${BENCHMARK_CXX_STANDARD})
set(CMAKE_CXX_STANDARD_REQUIRED YES)
set(CMAKE_CXX_EXTENSIONS OFF)
if (MSVC)
# Turn compiler warnings up to 11
string(REGEX REPLACE "[-/]W[1-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
# MP flag only applies to cl, not cl frontends to other compilers (e.g. clang-cl, icx-cl etc)
if(CMAKE_CXX_COMPILER_ID MATCHES MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
endif()
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
if(BENCHMARK_ENABLE_WERROR)
add_cxx_compiler_flag(-WX)
endif()
if (NOT BENCHMARK_ENABLE_EXCEPTIONS)
add_cxx_compiler_flag(-EHs-)
add_cxx_compiler_flag(-EHa-)
@@ -126,45 +185,48 @@ if (MSVC)
set(CMAKE_EXE_LINKER_FLAGS_MINSIZEREL "${CMAKE_EXE_LINKER_FLAGS_MINSIZEREL} /LTCG")
endif()
else()
# Try and enable C++11. Don't use C++14 because it doesn't work in some
# configurations.
add_cxx_compiler_flag(-std=c++11)
if (NOT HAVE_CXX_FLAG_STD_CXX11)
add_cxx_compiler_flag(-std=c++0x)
endif()
# Turn on Large-file Support
add_definitions(-D_FILE_OFFSET_BITS=64)
add_definitions(-D_LARGEFILE64_SOURCE)
add_definitions(-D_LARGEFILE_SOURCE)
# Turn compiler warnings up to 11
if (NOT MSVC)
add_cxx_compiler_flag(-Wall)
add_cxx_compiler_flag(-Wextra)
add_cxx_compiler_flag(-Wshadow)
add_cxx_compiler_flag(-Werror RELEASE)
add_cxx_compiler_flag(-Werror RELWITHDEBINFO)
add_cxx_compiler_flag(-Werror MINSIZEREL)
add_cxx_compiler_flag(-pedantic)
add_cxx_compiler_flag(-pedantic-errors)
add_cxx_compiler_flag(-Wshorten-64-to-32)
add_cxx_compiler_flag(-fstrict-aliasing)
# Disable warnings regarding deprecated parts of the library while building
# and testing those parts of the library.
add_cxx_compiler_flag(-Wno-deprecated-declarations)
if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
# Intel silently ignores '-Wno-deprecated-declarations',
# warning no. 1786 must be explicitly disabled.
# See #631 for rationale.
add_cxx_compiler_flag(-wd1786)
endif()
# Disable deprecation warnings for release builds (when -Werror is enabled).
add_cxx_compiler_flag(-Wno-deprecated RELEASE)
add_cxx_compiler_flag(-Wno-deprecated RELWITHDEBINFO)
add_cxx_compiler_flag(-Wno-deprecated MINSIZEREL)
if (NOT BENCHMARK_ENABLE_EXCEPTIONS)
add_cxx_compiler_flag(-fno-exceptions)
endif()
add_cxx_compiler_flag(-Wall)
add_cxx_compiler_flag(-Wextra)
add_cxx_compiler_flag(-Wshadow)
add_cxx_compiler_flag(-Wfloat-equal)
add_cxx_compiler_flag(-Wold-style-cast)
add_cxx_compiler_flag(-Wconversion)
if(BENCHMARK_ENABLE_WERROR)
add_cxx_compiler_flag(-Werror)
endif()
if (NOT BENCHMARK_ENABLE_TESTING)
# Disable warning when compiling tests as gtest does not use 'override'.
add_cxx_compiler_flag(-Wsuggest-override)
endif()
add_cxx_compiler_flag(-pedantic)
add_cxx_compiler_flag(-pedantic-errors)
add_cxx_compiler_flag(-Wshorten-64-to-32)
add_cxx_compiler_flag(-fstrict-aliasing)
# Disable warnings regarding deprecated parts of the library while building
# and testing those parts of the library.
add_cxx_compiler_flag(-Wno-deprecated-declarations)
if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel" OR CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
# Intel silently ignores '-Wno-deprecated-declarations',
# warning no. 1786 must be explicitly disabled.
# See #631 for rationale.
add_cxx_compiler_flag(-wd1786)
add_cxx_compiler_flag(-fno-finite-math-only)
endif()
# Disable deprecation warnings for release builds (when -Werror is enabled).
if(BENCHMARK_ENABLE_WERROR)
add_cxx_compiler_flag(-Wno-deprecated)
endif()
if (NOT BENCHMARK_ENABLE_EXCEPTIONS)
add_cxx_compiler_flag(-fno-exceptions)
endif()
if (HAVE_CXX_FLAG_FSTRICT_ALIASING)
if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel") #ICC17u2: Many false positives for Wstrict-aliasing
if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel" AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") #ICC17u2: Many false positives for Wstrict-aliasing
add_cxx_compiler_flag(-Wstrict-aliasing)
endif()
endif()
@@ -173,21 +235,26 @@ else()
add_cxx_compiler_flag(-wd654)
add_cxx_compiler_flag(-Wthread-safety)
if (HAVE_CXX_FLAG_WTHREAD_SAFETY)
cxx_feature_check(THREAD_SAFETY_ATTRIBUTES)
cxx_feature_check(THREAD_SAFETY_ATTRIBUTES "-DINCLUDE_DIRECTORIES=${PROJECT_SOURCE_DIR}/include")
endif()
# On most UNIX like platforms g++ and clang++ define _GNU_SOURCE as a
# predefined macro, which turns on all of the wonderful libc extensions.
# However g++ doesn't do this in Cygwin so we have to define it ourselfs
# However g++ doesn't do this in Cygwin so we have to define it ourselves
# since we depend on GNU/POSIX/BSD extensions.
if (CYGWIN)
add_definitions(-D_GNU_SOURCE=1)
endif()
if (QNXNTO)
add_definitions(-D_QNX_SOURCE)
endif()
# Link time optimisation
if (BENCHMARK_ENABLE_LTO)
add_cxx_compiler_flag(-flto)
if ("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU")
add_cxx_compiler_flag(-Wno-lto-type-mismatch)
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
find_program(GCC_AR gcc-ar)
if (GCC_AR)
set(CMAKE_AR ${GCC_AR})
@@ -196,7 +263,7 @@ else()
if (GCC_RANLIB)
set(CMAKE_RANLIB ${GCC_RANLIB})
endif()
elseif("${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
elseif("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
include(llvm-toolchain)
endif()
endif()
@@ -224,7 +291,8 @@ if (BENCHMARK_USE_LIBCXX)
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
add_cxx_compiler_flag(-stdlib=libc++)
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR
"${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
"${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel" OR
"${CMAKE_CXX_COMPILER_ID}" STREQUAL "IntelLLVM")
add_cxx_compiler_flag(-nostdinc++)
message(WARNING "libc++ header path must be manually specified using CMAKE_CXX_FLAGS")
# Adding -nodefaultlibs directly to CMAKE_<TYPE>_LINKER_FLAGS will break
@@ -250,9 +318,16 @@ if (NOT BENCHMARK_ENABLE_EXCEPTIONS AND HAVE_STD_REGEX
AND NOT HAVE_GNU_POSIX_REGEX AND NOT HAVE_POSIX_REGEX)
message(WARNING "Using std::regex with exceptions disabled is not fully supported")
endif()
cxx_feature_check(STEADY_CLOCK)
# Ensure we have pthreads
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
cxx_feature_check(PTHREAD_AFFINITY)
if (BENCHMARK_ENABLE_LIBPFM)
find_package(PFM REQUIRED)
endif()
# Set up directories
include_directories(${PROJECT_SOURCE_DIR}/include)
@@ -262,8 +337,18 @@ add_subdirectory(src)
if (BENCHMARK_ENABLE_TESTING)
enable_testing()
if (BENCHMARK_ENABLE_GTEST_TESTS)
include(HandleGTest)
if (BENCHMARK_ENABLE_GTEST_TESTS AND
NOT (TARGET gtest AND TARGET gtest_main AND
TARGET gmock AND TARGET gmock_main))
if (BENCHMARK_USE_BUNDLED_GTEST)
include(GoogleTest)
else()
find_package(GTest CONFIG REQUIRED)
add_library(gtest ALIAS GTest::gtest)
add_library(gtest_main ALIAS GTest::gtest_main)
add_library(gmock ALIAS GTest::gmock)
add_library(gmock_main ALIAS GTest::gmock_main)
endif()
endif()
add_subdirectory(test)
endif()

View File

@@ -22,44 +22,75 @@
#
# Please keep the list sorted.
Abhina Sreeskantharajan <abhina.sreeskantharajan@ibm.com>
Albert Pretorius <pretoalb@gmail.com>
Alex Steele <steelal123@gmail.com>
Andriy Berestovskyy <berestovskyy@gmail.com>
Arne Beer <arne@twobeer.de>
Bátor Tallér <bator.taller@shapr3d.com>
Billy Robert O'Neal III <billy.oneal@gmail.com> <bion@microsoft.com>
Cezary Skrzyński <czars1988@gmail.com>
Chris Kennelly <ckennelly@google.com> <ckennelly@ckennelly.com>
Christian Wassermann <christian_wassermann@web.de>
Christopher Seymour <chris.j.seymour@hotmail.com>
Colin Braley <braley.colin@gmail.com>
Cyrille Faucheux <cyrille.faucheux@gmail.com>
Daniel Harvey <danielharvey458@gmail.com>
David Coeurjolly <david.coeurjolly@liris.cnrs.fr>
Deniz Evrenci <denizevrenci@gmail.com>
Dominic Hamon <dma@stripysock.com> <dominic@google.com>
Dominik Czarnota <dominik.b.czarnota@gmail.com>
Dominik Korman <kormandominik@gmail.com>
Donald Aingworth <donalds_junk_mail@yahoo.com>
Doug Evans <xdje42@gmail.com>
Eric Backus <eric_backus@alum.mit.edu>
Eric Fiselier <eric@efcs.ca>
Eugene Zhuk <eugene.zhuk@gmail.com>
Evgeny Safronov <division494@gmail.com>
Fabien Pichot <pichot.fabien@gmail.com>
Fanbo Meng <fanbo.meng@ibm.com>
Federico Ficarelli <federico.ficarelli@gmail.com>
Felix Homann <linuxaudio@showlabor.de>
Geoffrey Martin-Noble <gcmn@google.com> <gmngeoffrey@gmail.com>
Gergely Meszaros <maetveis@gmail.com>
Gergő Szitár <szitar.gergo@gmail.com>
Hannes Hauswedell <h2@fsfe.org>
Henrique Bucher <hbucher@gmail.com>
Ismael Jimenez Martinez <ismael.jimenez.martinez@gmail.com>
Iakov Sergeev <yahontu@gmail.com>
Jern-Kuan Leong <jernkuan@gmail.com>
JianXiong Zhou <zhoujianxiong2@gmail.com>
Joao Paulo Magalhaes <joaoppmagalhaes@gmail.com>
John Millikin <jmillikin@stripe.com>
Jordan Williams <jwillikers@protonmail.com>
Jussi Knuuttila <jussi.knuuttila@gmail.com>
Kaito Udagawa <umireon@gmail.com>
Kai Wolf <kai.wolf@gmail.com>
Kishan Kumar <kumar.kishan@outlook.com>
Kaito Udagawa <umireon@gmail.com>
Lei Xu <eddyxu@gmail.com>
Marcel Jacobse <mjacobse@uni-bremen.de>
Matt Clarkson <mattyclarkson@gmail.com>
Maxim Vafin <maxvafin@gmail.com>
Mike Apodaca <gatorfax@gmail.com>
Min-Yih Hsu <yihshyng223@gmail.com>
Nick Hutchinson <nshutchinson@gmail.com>
Norman Heino <norman.heino@gmail.com>
Oleksandr Sochka <sasha.sochka@gmail.com>
Ori Livneh <ori.livneh@gmail.com>
Pascal Leroy <phl@google.com>
Paul Redmond <paul.redmond@gmail.com>
Pierre Phaneuf <pphaneuf@google.com>
Radoslav Yovchev <radoslav.tm@gmail.com>
Raghu Raja <raghu@enfabrica.net>
Rainer Orth <ro@cebitec.uni-bielefeld.de>
Raul Marin <rmrodriguez@cartodb.com>
Ray Glover <ray.glover@uk.ibm.com>
Robert Guo <robert.guo@mongodb.com>
Roman Lebedev <lebedev.ri@gmail.com>
Sayan Bhattacharjee <aero.sayan@gmail.com>
Shuo Chen <chenshuo@chenshuo.com>
Steven Wan <wan.yu@ibm.com>
Tobias Schmidt <tobias.schmidt@in.tum.de>
Tobias Ulvgård <tobias.ulvgard@dirac.se>
Tom Madams <tom.ej.madams@gmail.com> <tmadams@google.com>
Yixuan Qiu <yixuanq@gmail.com>

41
third_party/benchmark/MODULE.bazel vendored Normal file
View File

@@ -0,0 +1,41 @@
module(
name = "google_benchmark",
version = "1.9.4",
)
bazel_dep(name = "bazel_skylib", version = "1.7.1")
bazel_dep(name = "platforms", version = "0.0.10")
bazel_dep(name = "rules_cc", version = "0.0.9")
bazel_dep(name = "rules_python", version = "1.0.0", dev_dependency = True)
bazel_dep(name = "googletest", version = "1.14.0", dev_dependency = True, repo_name = "com_google_googletest")
bazel_dep(name = "libpfm", version = "4.11.0.bcr.1")
# Register a toolchain for Python 3.9 to be able to build numpy. Python
# versions >=3.10 are problematic.
# A second reason for this is to be able to build Python hermetically instead
# of relying on the changing default version from rules_python.
python = use_extension("@rules_python//python/extensions:python.bzl", "python", dev_dependency = True)
python.toolchain(python_version = "3.8")
python.toolchain(python_version = "3.9")
python.toolchain(python_version = "3.10")
python.toolchain(python_version = "3.11")
python.toolchain(
is_default = True,
python_version = "3.12",
)
python.toolchain(python_version = "3.13")
pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip", dev_dependency = True)
pip.parse(
hub_name = "tools_pip_deps",
python_version = "3.9",
requirements_lock = "//tools:requirements.txt",
)
use_repo(pip, "tools_pip_deps")
# -- bazel_dep definitions -- #
bazel_dep(name = "nanobind_bazel", version = "2.7.0", dev_dependency = True)

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,20 @@
workspace(name = "com_github_google_benchmark")
http_archive(
name = "com_google_googletest",
urls = ["https://github.com/google/googletest/archive/3f0cf6b62ad1eb50d8736538363d3580dd640c3e.zip"],
strip_prefix = "googletest-3f0cf6b62ad1eb50d8736538363d3580dd640c3e",
load("//:bazel/benchmark_deps.bzl", "benchmark_deps")
benchmark_deps()
load("@rules_python//python:repositories.bzl", "py_repositories")
py_repositories()
load("@rules_python//python:pip.bzl", "pip_parse")
pip_parse(
name = "tools_pip_deps",
requirements_lock = "//tools:requirements.txt",
)
load("@tools_pip_deps//:requirements.bzl", "install_deps")
install_deps()

View File

@@ -0,0 +1,2 @@
# This file marks the root of the Bazel workspace.
# See MODULE.bazel for dependencies and setup.

2
third_party/benchmark/_config.yml vendored Normal file
View File

@@ -0,0 +1,2 @@
theme: jekyll-theme-midnight
markdown: GFM

View File

@@ -41,7 +41,7 @@ build_script:
- cmake --build . --config %configuration%
test_script:
- ctest -c %configuration% --timeout 300 --output-on-failure
- ctest --build-config %configuration% --timeout 300 --output-on-failure
artifacts:
- path: '_build/CMakeFiles/*.log'

View File

@@ -0,0 +1,54 @@
"""
This file contains the Bazel build dependencies for Google Benchmark (both C++ source and Python bindings).
"""
load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
def benchmark_deps():
"""Loads dependencies required to build Google Benchmark."""
if "bazel_skylib" not in native.existing_rules():
http_archive(
name = "bazel_skylib",
sha256 = "cd55a062e763b9349921f0f5db8c3933288dc8ba4f76dd9416aac68acee3cb94",
urls = [
"https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.5.0/bazel-skylib-1.5.0.tar.gz",
"https://github.com/bazelbuild/bazel-skylib/releases/download/1.5.0/bazel-skylib-1.5.0.tar.gz",
],
)
if "rules_python" not in native.existing_rules():
http_archive(
name = "rules_python",
sha256 = "e85ae30de33625a63eca7fc40a94fea845e641888e52f32b6beea91e8b1b2793",
strip_prefix = "rules_python-0.27.1",
url = "https://github.com/bazelbuild/rules_python/releases/download/0.27.1/rules_python-0.27.1.tar.gz",
)
if "com_google_googletest" not in native.existing_rules():
new_git_repository(
name = "com_google_googletest",
remote = "https://github.com/google/googletest.git",
tag = "release-1.12.1",
)
if "nanobind" not in native.existing_rules():
new_git_repository(
name = "nanobind",
remote = "https://github.com/wjakob/nanobind.git",
tag = "v1.9.2",
build_file = "@//bindings/python:nanobind.BUILD",
recursive_init_submodules = True,
)
if "libpfm" not in native.existing_rules():
# Downloaded from v4.9.0 tag at https://sourceforge.net/p/perfmon2/libpfm4/ref/master/tags/
http_archive(
name = "libpfm",
build_file = str(Label("//tools:libpfm.BUILD.bazel")),
sha256 = "5da5f8872bde14b3634c9688d980f68bda28b510268723cc12973eedbab9fecc",
type = "tar.gz",
strip_prefix = "libpfm-4.11.0",
urls = ["https://sourceforge.net/projects/perfmon2/files/libpfm4/libpfm-4.11.0.tar.gz/download"],
)

View File

@@ -0,0 +1,34 @@
load("@nanobind_bazel//:build_defs.bzl", "nanobind_extension", "nanobind_stubgen")
load("@rules_python//python:defs.bzl", "py_library", "py_test")
py_library(
name = "google_benchmark",
srcs = ["__init__.py"],
visibility = ["//visibility:public"],
deps = [
":_benchmark",
],
)
nanobind_extension(
name = "_benchmark",
srcs = ["benchmark.cc"],
deps = ["//:benchmark"],
)
nanobind_stubgen(
name = "benchmark_stubgen",
marker_file = "bindings/python/google_benchmark/py.typed",
module = ":_benchmark",
)
py_test(
name = "example",
srcs = ["example.py"],
python_version = "PY3",
srcs_version = "PY3",
visibility = ["//visibility:public"],
deps = [
":google_benchmark",
],
)

View File

@@ -0,0 +1,145 @@
# Copyright 2020 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Python benchmarking utilities.
Example usage:
import google_benchmark as benchmark
@benchmark.register
def my_benchmark(state):
... # Code executed outside `while` loop is not timed.
while state:
... # Code executed within `while` loop is timed.
if __name__ == '__main__':
benchmark.main()
"""
import atexit
from absl import app
from google_benchmark import _benchmark
from google_benchmark._benchmark import (
Counter as Counter,
State as State,
kMicrosecond as kMicrosecond,
kMillisecond as kMillisecond,
kNanosecond as kNanosecond,
kSecond as kSecond,
o1 as o1,
oAuto as oAuto,
oLambda as oLambda,
oLogN as oLogN,
oN as oN,
oNCubed as oNCubed,
oNLogN as oNLogN,
oNone as oNone,
oNSquared as oNSquared,
)
__version__ = "1.9.4"
class __OptionMaker:
"""A stateless class to collect benchmark options.
Collect all decorator calls like @option.range(start=0, limit=1<<5).
"""
class Options:
"""Pure data class to store options calls, along with the benchmarked
function."""
def __init__(self, func):
self.func = func
self.builder_calls = []
@classmethod
def make(cls, func_or_options):
"""Make Options from Options or the benchmarked function."""
if isinstance(func_or_options, cls.Options):
return func_or_options
return cls.Options(func_or_options)
def __getattr__(self, builder_name):
"""Append option call in the Options."""
# The function that get returned on @option.range(start=0, limit=1<<5).
def __builder_method(*args, **kwargs):
# The decorator that get called, either with the benchmared function
# or the previous Options
def __decorator(func_or_options):
options = self.make(func_or_options)
options.builder_calls.append((builder_name, args, kwargs))
# The decorator returns Options so it is not technically a
# decorator and needs a final call to @register
return options
return __decorator
return __builder_method
# Alias for nicer API.
# We have to instantiate an object, even if stateless, to be able to use
# __getattr__ on option.range
option = __OptionMaker()
def register(undefined=None, *, name=None):
"""Register function for benchmarking."""
if undefined is None:
# Decorator is called without parenthesis so we return a decorator
return lambda f: register(f, name=name)
# We have either the function to benchmark (simple case) or an instance of
# Options (@option._ case).
options = __OptionMaker.make(undefined)
if name is None:
name = options.func.__name__
# We register the benchmark and reproduce all the @option._ calls onto the
# benchmark builder pattern
benchmark = _benchmark.RegisterBenchmark(name, options.func)
for name, args, kwargs in options.builder_calls[::-1]:
getattr(benchmark, name)(*args, **kwargs)
# return the benchmarked function because the decorator does not modify it
return options.func
def _flags_parser(argv):
argv = _benchmark.Initialize(argv)
return app.parse_flags_with_usage(argv)
def _run_benchmarks(argv):
if len(argv) > 1:
raise app.UsageError("Too many command-line arguments.")
return _benchmark.RunSpecifiedBenchmarks()
def main(argv=None):
return app.run(_run_benchmarks, argv=argv, flags_parser=_flags_parser)
# FIXME: can we rerun with disabled ASLR?
# Methods for use with custom main function.
initialize = _benchmark.Initialize
run_benchmarks = _benchmark.RunSpecifiedBenchmarks
atexit.register(_benchmark.ClearRegisteredBenchmarks)

View File

@@ -0,0 +1,184 @@
// Benchmark for Python.
#include "benchmark/benchmark.h"
#include "nanobind/nanobind.h"
#include "nanobind/operators.h"
#include "nanobind/stl/bind_map.h"
#include "nanobind/stl/string.h"
#include "nanobind/stl/vector.h"
NB_MAKE_OPAQUE(benchmark::UserCounters);
namespace {
namespace nb = nanobind;
std::vector<std::string> Initialize(const std::vector<std::string>& argv) {
// The `argv` pointers here become invalid when this function returns, but
// benchmark holds the pointer to `argv[0]`. We create a static copy of it
// so it persists, and replace the pointer below.
static std::string executable_name(argv[0]);
std::vector<char*> ptrs;
ptrs.reserve(argv.size());
for (auto& arg : argv) {
ptrs.push_back(const_cast<char*>(arg.c_str()));
}
ptrs[0] = const_cast<char*>(executable_name.c_str());
int argc = static_cast<int>(argv.size());
benchmark::Initialize(&argc, ptrs.data());
std::vector<std::string> remaining_argv;
remaining_argv.reserve(argc);
for (int i = 0; i < argc; ++i) {
remaining_argv.emplace_back(ptrs[i]);
}
return remaining_argv;
}
benchmark::internal::Benchmark* RegisterBenchmark(const std::string& name,
nb::callable f) {
return benchmark::RegisterBenchmark(
name, [f](benchmark::State& state) { f(&state); });
}
NB_MODULE(_benchmark, m) {
using benchmark::TimeUnit;
nb::enum_<TimeUnit>(m, "TimeUnit")
.value("kNanosecond", TimeUnit::kNanosecond)
.value("kMicrosecond", TimeUnit::kMicrosecond)
.value("kMillisecond", TimeUnit::kMillisecond)
.value("kSecond", TimeUnit::kSecond)
.export_values();
using benchmark::BigO;
nb::enum_<BigO>(m, "BigO")
.value("oNone", BigO::oNone)
.value("o1", BigO::o1)
.value("oN", BigO::oN)
.value("oNSquared", BigO::oNSquared)
.value("oNCubed", BigO::oNCubed)
.value("oLogN", BigO::oLogN)
.value("oNLogN", BigO::oNLogN)
.value("oAuto", BigO::oAuto)
.value("oLambda", BigO::oLambda)
.export_values();
using benchmark::internal::Benchmark;
nb::class_<Benchmark>(m, "Benchmark")
// For methods returning a pointer to the current object, reference
// return policy is used to ask nanobind not to take ownership of the
// returned object and avoid calling delete on it.
// https://pybind11.readthedocs.io/en/stable/advanced/functions.html#return-value-policies
//
// For methods taking a const std::vector<...>&, a copy is created
// because a it is bound to a Python list.
// https://pybind11.readthedocs.io/en/stable/advanced/cast/stl.html
.def("unit", &Benchmark::Unit, nb::rv_policy::reference)
.def("arg", &Benchmark::Arg, nb::rv_policy::reference)
.def("args", &Benchmark::Args, nb::rv_policy::reference)
.def("range", &Benchmark::Range, nb::rv_policy::reference,
nb::arg("start"), nb::arg("limit"))
.def("dense_range", &Benchmark::DenseRange,
nb::rv_policy::reference, nb::arg("start"),
nb::arg("limit"), nb::arg("step") = 1)
.def("ranges", &Benchmark::Ranges, nb::rv_policy::reference)
.def("args_product", &Benchmark::ArgsProduct,
nb::rv_policy::reference)
.def("arg_name", &Benchmark::ArgName, nb::rv_policy::reference)
.def("arg_names", &Benchmark::ArgNames,
nb::rv_policy::reference)
.def("range_pair", &Benchmark::RangePair,
nb::rv_policy::reference, nb::arg("lo1"), nb::arg("hi1"),
nb::arg("lo2"), nb::arg("hi2"))
.def("range_multiplier", &Benchmark::RangeMultiplier,
nb::rv_policy::reference)
.def("min_time", &Benchmark::MinTime, nb::rv_policy::reference)
.def("min_warmup_time", &Benchmark::MinWarmUpTime,
nb::rv_policy::reference)
.def("iterations", &Benchmark::Iterations,
nb::rv_policy::reference)
.def("repetitions", &Benchmark::Repetitions,
nb::rv_policy::reference)
.def("report_aggregates_only", &Benchmark::ReportAggregatesOnly,
nb::rv_policy::reference, nb::arg("value") = true)
.def("display_aggregates_only", &Benchmark::DisplayAggregatesOnly,
nb::rv_policy::reference, nb::arg("value") = true)
.def("measure_process_cpu_time", &Benchmark::MeasureProcessCPUTime,
nb::rv_policy::reference)
.def("use_real_time", &Benchmark::UseRealTime,
nb::rv_policy::reference)
.def("use_manual_time", &Benchmark::UseManualTime,
nb::rv_policy::reference)
.def(
"complexity",
(Benchmark * (Benchmark::*)(benchmark::BigO)) & Benchmark::Complexity,
nb::rv_policy::reference,
nb::arg("complexity") = benchmark::oAuto);
using benchmark::Counter;
nb::class_<Counter> py_counter(m, "Counter");
nb::enum_<Counter::Flags>(py_counter, "Flags", nb::is_arithmetic(), nb::is_flag())
.value("kDefaults", Counter::Flags::kDefaults)
.value("kIsRate", Counter::Flags::kIsRate)
.value("kAvgThreads", Counter::Flags::kAvgThreads)
.value("kAvgThreadsRate", Counter::Flags::kAvgThreadsRate)
.value("kIsIterationInvariant", Counter::Flags::kIsIterationInvariant)
.value("kIsIterationInvariantRate",
Counter::Flags::kIsIterationInvariantRate)
.value("kAvgIterations", Counter::Flags::kAvgIterations)
.value("kAvgIterationsRate", Counter::Flags::kAvgIterationsRate)
.value("kInvert", Counter::Flags::kInvert)
.export_values();
nb::enum_<Counter::OneK>(py_counter, "OneK")
.value("kIs1000", Counter::OneK::kIs1000)
.value("kIs1024", Counter::OneK::kIs1024)
.export_values();
py_counter
.def(nb::init<double, Counter::Flags, Counter::OneK>(),
nb::arg("value") = 0., nb::arg("flags") = Counter::kDefaults,
nb::arg("k") = Counter::kIs1000)
.def("__init__",
([](Counter* c, double value) { new (c) Counter(value); }))
.def_rw("value", &Counter::value)
.def_rw("flags", &Counter::flags)
.def_rw("oneK", &Counter::oneK)
.def(nb::init_implicit<double>());
nb::implicitly_convertible<nb::int_, Counter>();
nb::bind_map<benchmark::UserCounters>(m, "UserCounters");
using benchmark::State;
nb::class_<State>(m, "State")
.def("__bool__", &State::KeepRunning)
.def_prop_ro("keep_running", &State::KeepRunning)
.def("pause_timing", &State::PauseTiming)
.def("resume_timing", &State::ResumeTiming)
.def("skip_with_error", &State::SkipWithError)
.def_prop_ro("error_occurred", &State::error_occurred)
.def("set_iteration_time", &State::SetIterationTime)
.def_prop_rw("bytes_processed", &State::bytes_processed,
&State::SetBytesProcessed)
.def_prop_rw("complexity_n", &State::complexity_length_n,
&State::SetComplexityN)
.def_prop_rw("items_processed", &State::items_processed,
&State::SetItemsProcessed)
.def("set_label", &State::SetLabel)
.def("range", &State::range, nb::arg("pos") = 0)
.def_prop_ro("iterations", &State::iterations)
.def_prop_ro("name", &State::name)
.def_rw("counters", &State::counters)
.def_prop_ro("thread_index", &State::thread_index)
.def_prop_ro("threads", &State::threads);
m.def("Initialize", Initialize);
m.def("RegisterBenchmark", RegisterBenchmark,
nb::rv_policy::reference);
m.def("RunSpecifiedBenchmarks",
[]() { benchmark::RunSpecifiedBenchmarks(); });
m.def("ClearRegisteredBenchmarks", benchmark::ClearRegisteredBenchmarks);
};
} // namespace

View File

@@ -0,0 +1,140 @@
# Copyright 2020 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Example of Python using C++ benchmark framework.
To run this example, you must first install the `google_benchmark` Python
package.
To install using `setup.py`, download and extract the `google_benchmark` source.
In the extracted directory, execute:
python setup.py install
"""
import random
import time
import google_benchmark as benchmark
from google_benchmark import Counter
@benchmark.register
def empty(state):
while state:
pass
@benchmark.register
def sum_million(state):
while state:
sum(range(1_000_000))
@benchmark.register
def pause_timing(state):
"""Pause timing every iteration."""
while state:
# Construct a list of random ints every iteration without timing it
state.pause_timing()
random_list = [random.randint(0, 100) for _ in range(100)]
state.resume_timing()
# Time the in place sorting algorithm
random_list.sort()
@benchmark.register
def skipped(state):
if True: # Test some predicate here.
state.skip_with_error("some error")
return # NOTE: You must explicitly return, or benchmark will continue.
# Benchmark code would be here.
@benchmark.register
@benchmark.option.use_manual_time()
def manual_timing(state):
while state:
# Manually count Python CPU time
start = time.perf_counter() # perf_counter_ns() in Python 3.7+
# Something to benchmark
time.sleep(0.01)
end = time.perf_counter()
state.set_iteration_time(end - start)
@benchmark.register
def custom_counters(state):
"""Collect custom metric using benchmark.Counter."""
num_foo = 0.0
while state:
# Benchmark some code here
# Collect some custom metric named foo
num_foo += 0.13
# Automatic Counter from numbers.
state.counters["foo"] = num_foo
# Set a counter as a rate.
state.counters["foo_rate"] = Counter(num_foo, Counter.kIsRate)
# Set a counter as an inverse of rate.
state.counters["foo_inv_rate"] = Counter(
num_foo, Counter.kIsRate | Counter.kInvert
)
# Set a counter as a thread-average quantity.
state.counters["foo_avg"] = Counter(num_foo, Counter.kAvgThreads)
# There's also a combined flag:
state.counters["foo_avg_rate"] = Counter(num_foo, Counter.kAvgThreadsRate)
@benchmark.register
@benchmark.option.measure_process_cpu_time()
@benchmark.option.use_real_time()
def with_options(state):
while state:
sum(range(1_000_000))
@benchmark.register(name="sum_million_microseconds")
@benchmark.option.unit(benchmark.kMicrosecond)
def with_options2(state):
while state:
sum(range(1_000_000))
@benchmark.register
@benchmark.option.arg(100)
@benchmark.option.arg(1000)
def passing_argument(state):
while state:
sum(range(state.range(0)))
@benchmark.register
@benchmark.option.range(8, limit=8 << 10)
def using_range(state):
while state:
sum(range(state.range(0)))
@benchmark.register
@benchmark.option.range_multiplier(2)
@benchmark.option.range(1 << 10, 1 << 18)
@benchmark.option.complexity(benchmark.oN)
def computing_complexity(state):
while state:
sum(range(state.range(0)))
state.complexity_n = state.range(0)
if __name__ == "__main__":
benchmark.main()

View File

@@ -34,9 +34,11 @@ function(add_cxx_compiler_flag FLAG)
check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG})
set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}")
if(${MANGLED_FLAG})
set(VARIANT ${ARGV1})
if(ARGV1)
if(ARGC GREATER 1)
set(VARIANT ${ARGV1})
string(TOUPPER "_${VARIANT}" VARIANT)
else()
set(VARIANT "")
endif()
set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${BENCHMARK_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE)
endif()
@@ -49,9 +51,11 @@ function(add_required_cxx_compiler_flag FLAG)
check_cxx_compiler_flag("${FLAG}" ${MANGLED_FLAG})
set(CMAKE_REQUIRED_FLAGS "${OLD_CMAKE_REQUIRED_FLAGS}")
if(${MANGLED_FLAG})
set(VARIANT ${ARGV1})
if(ARGV1)
if(ARGC GREATER 1)
set(VARIANT ${ARGV1})
string(TOUPPER "_${VARIANT}" VARIANT)
else()
set(VARIANT "")
endif()
set(CMAKE_CXX_FLAGS${VARIANT} "${CMAKE_CXX_FLAGS${VARIANT}} ${FLAG}" PARENT_SCOPE)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FLAG}" PARENT_SCOPE)

View File

@@ -17,6 +17,8 @@ if(__cxx_feature_check)
endif()
set(__cxx_feature_check INCLUDED)
option(CXXFEATURECHECK_DEBUG OFF)
function(cxx_feature_check FILE)
string(TOLOWER ${FILE} FILE)
string(TOUPPER ${FILE} VAR)
@@ -27,26 +29,38 @@ function(cxx_feature_check FILE)
return()
endif()
set(FEATURE_CHECK_CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS})
if (ARGC GREATER 1)
message(STATUS "Enabling additional flags: ${ARGV1}")
list(APPEND FEATURE_CHECK_CMAKE_FLAGS ${ARGV1})
endif()
if (NOT DEFINED COMPILE_${FEATURE})
message(STATUS "Performing Test ${FEATURE}")
if(CMAKE_CROSSCOMPILING)
message(STATUS "Cross-compiling to test ${FEATURE}")
try_compile(COMPILE_${FEATURE}
${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}
LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES})
CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS}
LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}
OUTPUT_VARIABLE COMPILE_OUTPUT_VAR)
if(COMPILE_${FEATURE})
message(WARNING
"If you see build failures due to cross compilation, try setting HAVE_${VAR} to 0")
set(RUN_${FEATURE} 0)
set(RUN_${FEATURE} 0 CACHE INTERNAL "")
else()
set(RUN_${FEATURE} 1)
set(RUN_${FEATURE} 1 CACHE INTERNAL "")
endif()
else()
message(STATUS "Performing Test ${FEATURE}")
message(STATUS "Compiling and running to test ${FEATURE}")
try_run(RUN_${FEATURE} COMPILE_${FEATURE}
${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/${FILE}.cpp
CMAKE_FLAGS ${BENCHMARK_CXX_LINKER_FLAGS}
LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES})
CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
CMAKE_FLAGS ${FEATURE_CHECK_CMAKE_FLAGS}
LINK_LIBRARIES ${BENCHMARK_CXX_LIBRARIES}
COMPILE_OUTPUT_VARIABLE COMPILE_OUTPUT_VAR)
endif()
endif()
@@ -56,7 +70,11 @@ function(cxx_feature_check FILE)
add_definitions(-DHAVE_${VAR})
else()
if(NOT COMPILE_${FEATURE})
message(STATUS "Performing Test ${FEATURE} -- failed to compile")
if(CXXFEATURECHECK_DEBUG)
message(STATUS "Performing Test ${FEATURE} -- failed to compile: ${COMPILE_OUTPUT_VAR}")
else()
message(STATUS "Performing Test ${FEATURE} -- failed to compile")
endif()
else()
message(STATUS "Performing Test ${FEATURE} -- compiled but failed to run")
endif()

View File

@@ -1 +1,12 @@
@PACKAGE_INIT@
include (CMakeFindDependencyMacro)
find_dependency (Threads)
if (@BENCHMARK_ENABLE_LIBPFM@)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}")
find_dependency (PFM)
endif()
include("${CMAKE_CURRENT_LIST_DIR}/@targets_export_name@.cmake")

View File

@@ -20,35 +20,17 @@ set(__get_git_version INCLUDED)
function(get_git_version var)
if(GIT_EXECUTABLE)
execute_process(COMMAND ${GIT_EXECUTABLE} describe --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8
execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --match "v[0-9]*.[0-9]*.[0-9]*" --abbrev=8 --dirty
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
RESULT_VARIABLE status
OUTPUT_VARIABLE GIT_VERSION
ERROR_QUIET)
if(${status})
if(status)
set(GIT_VERSION "v0.0.0")
else()
string(STRIP ${GIT_VERSION} GIT_VERSION)
string(REGEX REPLACE "-[0-9]+-g" "-" GIT_VERSION ${GIT_VERSION})
endif()
# Work out if the repository is dirty
execute_process(COMMAND ${GIT_EXECUTABLE} update-index -q --refresh
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
OUTPUT_QUIET
ERROR_QUIET)
execute_process(COMMAND ${GIT_EXECUTABLE} diff-index --name-only HEAD --
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
OUTPUT_VARIABLE GIT_DIFF_INDEX
ERROR_QUIET)
string(COMPARE NOTEQUAL "${GIT_DIFF_INDEX}" "" GIT_DIRTY)
if (${GIT_DIRTY})
set(GIT_VERSION "${GIT_VERSION}-dirty")
endif()
else()
set(GIT_VERSION "v0.0.0")
endif()
message(STATUS "git Version: ${GIT_VERSION}")
set(${var} ${GIT_VERSION} PARENT_SCOPE)
endfunction()

View File

@@ -0,0 +1,58 @@
# Download and unpack googletest at configure time
set(GOOGLETEST_PREFIX "${benchmark_BINARY_DIR}/third_party/googletest")
configure_file(${benchmark_SOURCE_DIR}/cmake/GoogleTest.cmake.in ${GOOGLETEST_PREFIX}/CMakeLists.txt @ONLY)
set(GOOGLETEST_PATH "${CMAKE_CURRENT_SOURCE_DIR}/googletest" CACHE PATH "") # Mind the quotes
execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}"
-DALLOW_DOWNLOADING_GOOGLETEST=${BENCHMARK_DOWNLOAD_DEPENDENCIES} -DGOOGLETEST_PATH:PATH=${GOOGLETEST_PATH} .
RESULT_VARIABLE result
WORKING_DIRECTORY ${GOOGLETEST_PREFIX}
)
if(result)
message(FATAL_ERROR "CMake step for googletest failed: ${result}")
endif()
execute_process(
COMMAND ${CMAKE_COMMAND} --build .
RESULT_VARIABLE result
WORKING_DIRECTORY ${GOOGLETEST_PREFIX}
)
if(result)
message(FATAL_ERROR "Build step for googletest failed: ${result}")
endif()
# Prevent overriding the parent project's compiler/linker
# settings on Windows
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
include(${GOOGLETEST_PREFIX}/googletest-paths.cmake)
# Add googletest directly to our build. This defines
# the gtest and gtest_main targets.
add_subdirectory(${GOOGLETEST_SOURCE_DIR}
${GOOGLETEST_BINARY_DIR}
EXCLUDE_FROM_ALL)
# googletest doesn't seem to want to stay build warning clean so let's not hurt ourselves.
if (MSVC)
target_compile_options(gtest PRIVATE "/wd4244" "/wd4722")
target_compile_options(gtest_main PRIVATE "/wd4244" "/wd4722")
target_compile_options(gmock PRIVATE "/wd4244" "/wd4722")
target_compile_options(gmock_main PRIVATE "/wd4244" "/wd4722")
else()
target_compile_options(gtest PRIVATE "-w")
target_compile_options(gtest_main PRIVATE "-w")
target_compile_options(gmock PRIVATE "-w")
target_compile_options(gmock_main PRIVATE "-w")
endif()
if(NOT DEFINED GTEST_COMPILE_COMMANDS)
set(GTEST_COMPILE_COMMANDS ON)
endif()
set_target_properties(gtest PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gtest,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS})
set_target_properties(gtest_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gtest_main,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS})
set_target_properties(gmock PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gmock,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS})
set_target_properties(gmock_main PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES $<TARGET_PROPERTY:gmock_main,INTERFACE_INCLUDE_DIRECTORIES> EXPORT_COMPILE_COMMANDS ${GTEST_COMPILE_COMMANDS})

View File

@@ -0,0 +1,60 @@
cmake_minimum_required (VERSION 3.13...3.22)
project(googletest-download NONE)
# Enable ExternalProject CMake module
include(ExternalProject)
option(ALLOW_DOWNLOADING_GOOGLETEST "If googletest src tree is not found in location specified by GOOGLETEST_PATH, do fetch the archive from internet" OFF)
set(GOOGLETEST_PATH "/usr/src/googletest" CACHE PATH
"Path to the googletest root tree. Should contain googletest and googlemock subdirs. And CMakeLists.txt in root, and in both of these subdirs")
# Download and install GoogleTest
message(STATUS "Looking for Google Test sources")
message(STATUS "Looking for Google Test sources in ${GOOGLETEST_PATH}")
if(EXISTS "${GOOGLETEST_PATH}" AND IS_DIRECTORY "${GOOGLETEST_PATH}" AND EXISTS "${GOOGLETEST_PATH}/CMakeLists.txt" AND
EXISTS "${GOOGLETEST_PATH}/googletest" AND IS_DIRECTORY "${GOOGLETEST_PATH}/googletest" AND EXISTS "${GOOGLETEST_PATH}/googletest/CMakeLists.txt" AND
EXISTS "${GOOGLETEST_PATH}/googlemock" AND IS_DIRECTORY "${GOOGLETEST_PATH}/googlemock" AND EXISTS "${GOOGLETEST_PATH}/googlemock/CMakeLists.txt")
message(STATUS "Found Google Test in ${GOOGLETEST_PATH}")
ExternalProject_Add(
googletest
PREFIX "${CMAKE_BINARY_DIR}"
DOWNLOAD_DIR "${CMAKE_BINARY_DIR}/download"
SOURCE_DIR "${GOOGLETEST_PATH}" # use existing src dir.
BINARY_DIR "${CMAKE_BINARY_DIR}/build"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
else()
if(NOT ALLOW_DOWNLOADING_GOOGLETEST)
message(SEND_ERROR "Did not find Google Test sources! Either pass correct path in GOOGLETEST_PATH, or enable BENCHMARK_DOWNLOAD_DEPENDENCIES, or disable BENCHMARK_USE_BUNDLED_GTEST, or disable BENCHMARK_ENABLE_GTEST_TESTS / BENCHMARK_ENABLE_TESTING.")
return()
else()
message(STATUS "Did not find Google Test sources! Fetching from web...")
ExternalProject_Add(
googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG "v1.15.2"
GIT_SHALLOW "ON"
PREFIX "${CMAKE_BINARY_DIR}"
STAMP_DIR "${CMAKE_BINARY_DIR}/stamp"
DOWNLOAD_DIR "${CMAKE_BINARY_DIR}/download"
SOURCE_DIR "${CMAKE_BINARY_DIR}/src"
BINARY_DIR "${CMAKE_BINARY_DIR}/build"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
endif()
endif()
ExternalProject_Get_Property(googletest SOURCE_DIR BINARY_DIR)
file(WRITE googletest-paths.cmake
"set(GOOGLETEST_SOURCE_DIR \"${SOURCE_DIR}\")
set(GOOGLETEST_BINARY_DIR \"${BINARY_DIR}\")
")

View File

@@ -1,113 +0,0 @@
include(split_list)
macro(build_external_gtest)
include(ExternalProject)
set(GTEST_FLAGS "")
if (BENCHMARK_USE_LIBCXX)
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
list(APPEND GTEST_FLAGS -stdlib=libc++)
else()
message(WARNING "Unsupported compiler (${CMAKE_CXX_COMPILER}) when using libc++")
endif()
endif()
if (BENCHMARK_BUILD_32_BITS)
list(APPEND GTEST_FLAGS -m32)
endif()
if (NOT "${CMAKE_CXX_FLAGS}" STREQUAL "")
list(APPEND GTEST_FLAGS ${CMAKE_CXX_FLAGS})
endif()
string(TOUPPER "${CMAKE_BUILD_TYPE}" GTEST_BUILD_TYPE)
if ("${GTEST_BUILD_TYPE}" STREQUAL "COVERAGE")
set(GTEST_BUILD_TYPE "DEBUG")
endif()
# FIXME: Since 10/Feb/2017 the googletest trunk has had a bug where
# -Werror=unused-function fires during the build on OS X. This is a temporary
# workaround to keep our travis bots from failing. It should be removed
# once gtest is fixed.
if (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
list(APPEND GTEST_FLAGS "-Wno-unused-function")
endif()
split_list(GTEST_FLAGS)
set(EXCLUDE_FROM_ALL_OPT "")
set(EXCLUDE_FROM_ALL_VALUE "")
if (${CMAKE_VERSION} VERSION_GREATER "3.0.99")
set(EXCLUDE_FROM_ALL_OPT "EXCLUDE_FROM_ALL")
set(EXCLUDE_FROM_ALL_VALUE "ON")
endif()
ExternalProject_Add(googletest
${EXCLUDE_FROM_ALL_OPT} ${EXCLUDE_FROM_ALL_VALUE}
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG master
PREFIX "${CMAKE_BINARY_DIR}/googletest"
INSTALL_DIR "${CMAKE_BINARY_DIR}/googletest"
CMAKE_CACHE_ARGS
-DCMAKE_BUILD_TYPE:STRING=${GTEST_BUILD_TYPE}
-DCMAKE_C_COMPILER:STRING=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER:STRING=${CMAKE_CXX_COMPILER}
-DCMAKE_INSTALL_PREFIX:PATH=<INSTALL_DIR>
-DCMAKE_INSTALL_LIBDIR:PATH=<INSTALL_DIR>/lib
-DCMAKE_CXX_FLAGS:STRING=${GTEST_FLAGS}
-Dgtest_force_shared_crt:BOOL=ON
)
ExternalProject_Get_Property(googletest install_dir)
set(GTEST_INCLUDE_DIRS ${install_dir}/include)
file(MAKE_DIRECTORY ${GTEST_INCLUDE_DIRS})
set(LIB_SUFFIX "${CMAKE_STATIC_LIBRARY_SUFFIX}")
set(LIB_PREFIX "${CMAKE_STATIC_LIBRARY_PREFIX}")
if("${GTEST_BUILD_TYPE}" STREQUAL "DEBUG")
set(LIB_SUFFIX "d${CMAKE_STATIC_LIBRARY_SUFFIX}")
endif()
# Use gmock_main instead of gtest_main because it initializes gtest as well.
# Note: The libraries are listed in reverse order of their dependancies.
foreach(LIB gtest gmock gmock_main)
add_library(${LIB} UNKNOWN IMPORTED)
set_target_properties(${LIB} PROPERTIES
IMPORTED_LOCATION ${install_dir}/lib/${LIB_PREFIX}${LIB}${LIB_SUFFIX}
INTERFACE_INCLUDE_DIRECTORIES ${GTEST_INCLUDE_DIRS}
INTERFACE_LINK_LIBRARIES "${GTEST_BOTH_LIBRARIES}"
)
add_dependencies(${LIB} googletest)
list(APPEND GTEST_BOTH_LIBRARIES ${LIB})
endforeach()
endmacro(build_external_gtest)
if (BENCHMARK_ENABLE_GTEST_TESTS)
if (IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/googletest)
set(GTEST_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/googletest")
set(INSTALL_GTEST OFF CACHE INTERNAL "")
set(INSTALL_GMOCK OFF CACHE INTERNAL "")
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/googletest)
set(GTEST_BOTH_LIBRARIES gtest gmock gmock_main)
foreach(HEADER test mock)
# CMake 2.8 and older don't respect INTERFACE_INCLUDE_DIRECTORIES, so we
# have to add the paths ourselves.
set(HFILE g${HEADER}/g${HEADER}.h)
set(HPATH ${GTEST_ROOT}/google${HEADER}/include)
find_path(HEADER_PATH_${HEADER} ${HFILE}
NO_DEFAULT_PATHS
HINTS ${HPATH}
)
if (NOT HEADER_PATH_${HEADER})
message(FATAL_ERROR "Failed to find header ${HFILE} in ${HPATH}")
endif()
list(APPEND GTEST_INCLUDE_DIRS ${HEADER_PATH_${HEADER}})
endforeach()
elseif(BENCHMARK_DOWNLOAD_DEPENDENCIES)
build_external_gtest()
else()
find_package(GTest REQUIRED)
find_path(GMOCK_INCLUDE_DIRS gmock/gmock.h
HINTS ${GTEST_INCLUDE_DIRS})
if (NOT GMOCK_INCLUDE_DIRS)
message(FATAL_ERROR "Failed to find header gmock/gmock.h with hint ${GTEST_INCLUDE_DIRS}")
endif()
set(GTEST_INCLUDE_DIRS ${GTEST_INCLUDE_DIRS} ${GMOCK_INCLUDE_DIRS})
# FIXME: We don't currently require the gmock library to build the tests,
# and it's likely we won't find it, so we don't try. As long as we've
# found the gmock/gmock.h header and gtest_main that should be good enough.
endif()
endif()

View File

@@ -1,11 +1,12 @@
prefix=@CMAKE_INSTALL_PREFIX@
exec_prefix=${prefix}
libdir=${prefix}/lib
includedir=${prefix}/include
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
Name: @PROJECT_NAME@
Description: Google microbenchmark framework
Version: @VERSION@
Version: @NORMALIZED_VERSION@
Libs: -L${libdir} -lbenchmark
Libs.private: -lpthread @BENCHMARK_PRIVATE_LINK_LIBRARIES@
Cflags: -I${includedir}

View File

@@ -0,0 +1,7 @@
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
Name: @PROJECT_NAME@
Description: Google microbenchmark framework (with main() function)
Version: @NORMALIZED_VERSION@
Requires: benchmark
Libs: -L${libdir} -lbenchmark_main

View File

@@ -0,0 +1,16 @@
#include <pthread.h>
int main() {
cpu_set_t set;
CPU_ZERO(&set);
for (int i = 0; i < CPU_SETSIZE; ++i) {
CPU_SET(i, &set);
CPU_CLR(i, &set);
}
pthread_t self = pthread_self();
int ret;
ret = pthread_getaffinity_np(self, sizeof(set), &set);
if (ret != 0) return ret;
ret = pthread_setaffinity_np(self, sizeof(set), &set);
if (ret != 0) return ret;
return 0;
}

View File

@@ -111,6 +111,7 @@ between compilers or compiler versions. A common example of this
is matching stack frame addresses. In this case regular expressions
can be used to match the differing bits of output. For example:
<!-- {% raw %} -->
```c++
int ExternInt;
struct Point { int x, y, z; };
@@ -127,6 +128,7 @@ extern "C" void test_store_point() {
// CHECK: ret
}
```
<!-- {% endraw %} -->
## Current Requirements and Limitations

View File

@@ -0,0 +1,3 @@
theme: jekyll-theme-minimal
logo: /assets/images/icon_black.png
show_downloads: true

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

View File

@@ -0,0 +1,19 @@
# Build tool dependency policy
We follow the [Foundational C++ support policy](https://opensource.google/documentation/policies/cplusplus-support) for our build tools. In
particular the ["Build Systems" section](https://opensource.google/documentation/policies/cplusplus-support#build-systems).
## CMake
The current supported version is CMake 3.10 as of 2023-08-10. Most modern
distributions include newer versions, for example:
* Ubuntu 20.04 provides CMake 3.16.3
* Debian 11.4 provides CMake 3.18.4
* Ubuntu 22.04 provides CMake 3.22.1
## Python
The Python bindings require Python 3.10+ as of v1.9.0 (2024-08-16) for installation from PyPI.
Building from source for older versions probably still works, though. See the [user guide](python_bindings.md) for details on how to build from source.
The minimum theoretically supported version is Python 3.8, since the used bindings generator (nanobind) only supports Python 3.8+.

12
third_party/benchmark/docs/index.md vendored Normal file
View File

@@ -0,0 +1,12 @@
# Benchmark
* [Assembly Tests](AssemblyTests.md)
* [Dependencies](dependencies.md)
* [Perf Counters](perf_counters.md)
* [Platform Specific Build Instructions](platform_specific_build_instructions.md)
* [Python Bindings](python_bindings.md)
* [Random Interleaving](random_interleaving.md)
* [Reducing Variance](reducing_variance.md)
* [Releasing](releasing.md)
* [Tools](tools.md)
* [User Guide](user_guide.md)

View File

@@ -0,0 +1,35 @@
<a name="perf-counters" />
# User-Requested Performance Counters
When running benchmarks, the user may choose to request collection of
performance counters. This may be useful in investigation scenarios - narrowing
down the cause of a regression; or verifying that the underlying cause of a
performance improvement matches expectations.
This feature is available if:
* The benchmark is run on an architecture featuring a Performance Monitoring
Unit (PMU),
* The benchmark is compiled with support for collecting counters. Currently,
this requires [libpfm](http://perfmon2.sourceforge.net/), which is built as a
dependency via Bazel.
The feature does not require modifying benchmark code. Counter collection is
handled at the boundaries where timer collection is also handled.
To opt-in:
* If using a Bazel build, add `--define pfm=1` to your build flags
* If using CMake:
* Install `libpfm4-dev`, e.g. `apt-get install libpfm4-dev`.
* Enable the CMake flag `BENCHMARK_ENABLE_LIBPFM` in `CMakeLists.txt`.
To use, pass a comma-separated list of counter names through the
`--benchmark_perf_counters` flag. The names are decoded through libpfm - meaning,
they are platform specific, but some (e.g. `CYCLES` or `INSTRUCTIONS`) are
mapped by libpfm to platform-specifics - see libpfm
[documentation](http://perfmon2.sourceforge.net/docs.html) for more details.
The counter values are reported back through the [User Counters](../README.md#custom-counters)
mechanism, meaning, they are available in all the formats (e.g. JSON) supported
by User Counters.

View File

@@ -0,0 +1,52 @@
# Platform Specific Build Instructions
## Building with GCC
When the library is built using GCC it is necessary to link with the pthread
library due to how GCC implements `std::thread`. Failing to link to pthread will
lead to runtime exceptions (unless you're using libc++), not linker errors. See
[issue #67](https://github.com/google/benchmark/issues/67) for more details. You
can link to pthread by adding `-pthread` to your linker command. Note, you can
also use `-lpthread`, but there are potential issues with ordering of command
line parameters if you use that.
On QNX, the pthread library is part of libc and usually included automatically
(see
[`pthread_create()`](https://www.qnx.com/developers/docs/7.1/index.html#com.qnx.doc.neutrino.lib_ref/topic/p/pthread_create.html)).
There's no separate pthread library to link.
## Building with Visual Studio 2015, 2017 or 2022
The `shlwapi` library (`-lshlwapi`) is required to support a call to `CPUInfo` which reads the registry. Either add `shlwapi.lib` under `[ Configuration Properties > Linker > Input ]`, or use the following:
```
// Alternatively, can add libraries using linker options.
// First, Add the path to the generated library files (directory containing the `benchmark.lib`) in `[Configuration Properties > Linker > General > Additional Library Directories]`. Then do the following:
#ifdef _WIN32
#pragma comment ( lib, "Shlwapi.lib" )
#ifdef _DEBUG
#pragma comment ( lib, "benchmark.lib" )
#else
#pragma comment ( lib, "benchmark.lib" )
#endif
#endif
```
When using the static library, make sure to add `BENCHMARK_STATIC_DEFINE` under `[Configuration Properties > C/C++ > Preprocessor > Preprocessor Definitions]`
Can also use the graphical version of CMake:
* Open `CMake GUI`.
* Under `Where to build the binaries`, same path as source plus `build`.
* Under `CMAKE_INSTALL_PREFIX`, same path as source plus `install`.
* Click `Configure`, `Generate`, `Open Project`.
* If build fails, try deleting entire directory and starting again, or unticking options to build less.
## Building with Intel 2015 Update 1 or Intel System Studio Update 4
See instructions for building with Visual Studio. Once built, right click on the solution and change the build to Intel.
## Building on Solaris
If you're running benchmarks on solaris, you'll want the kstat library linked in
too (`-lkstat`).

View File

@@ -0,0 +1,34 @@
# Building and installing Python bindings
Python bindings are available as wheels on [PyPI](https://pypi.org/project/google-benchmark/) for importing and
using Google Benchmark directly in Python.
Currently, pre-built wheels exist for macOS (both ARM64 and Intel x86), Linux x86-64 and 64-bit Windows.
Supported Python versions are Python 3.8 - 3.12.
To install Google Benchmark's Python bindings, run:
```bash
python -m pip install --upgrade pip # for manylinux2014 support
python -m pip install google-benchmark
```
In order to keep your system Python interpreter clean, it is advisable to run these commands in a virtual
environment. See the [official Python documentation](https://docs.python.org/3/library/venv.html)
on how to create virtual environments.
To build a wheel directly from source, you can follow these steps:
```bash
git clone https://github.com/google/benchmark.git
cd benchmark
# create a virtual environment and activate it
python3 -m venv venv --system-site-packages
source venv/bin/activate # .\venv\Scripts\Activate.ps1 on Windows
# upgrade Python's system-wide packages
python -m pip install --upgrade pip build
# builds the wheel and stores it in the directory "dist".
python -m build
```
NB: Building wheels from source requires Bazel. For platform-specific instructions on how to install Bazel,
refer to the [Bazel installation docs](https://bazel.build/install).

View File

@@ -0,0 +1,13 @@
<a name="interleaving" />
# Random Interleaving
[Random Interleaving](https://github.com/google/benchmark/issues/1051) is a
technique to lower run-to-run variance. It randomly interleaves repetitions of a
microbenchmark with repetitions from other microbenchmarks in the same benchmark
test. Data shows it is able to lower run-to-run variance by
[40%](https://github.com/google/benchmark/issues/1051) on average.
To use, you mainly need to set `--benchmark_enable_random_interleaving=true`,
and optionally specify non-zero repetition count `--benchmark_repetitions=9`
and optionally decrease the per-repetition time `--benchmark_min_time=0.1`.

View File

@@ -0,0 +1,133 @@
# Reducing Variance
<a name="disabling-cpu-frequency-scaling" />
## Disabling CPU Frequency Scaling
If you see this error:
```
***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead.
```
you might want to disable the CPU frequency scaling while running the
benchmark, as well as consider other ways to stabilize the performance of
your system while benchmarking.
Exactly how to do this depends on the Linux distribution,
desktop environment, and installed programs. Specific details are a moving
target, so we will not attempt to exhaustively document them here.
One simple option is to use the `cpupower` program to change the
performance governor to "performance". This tool is maintained along with
the Linux kernel and provided by your distribution.
It must be run as root, like this:
```bash
sudo cpupower frequency-set --governor performance
```
After this you can verify that all CPUs are using the performance governor
by running this command:
```bash
cpupower frequency-info -o proc
```
The benchmarks you subsequently run will have less variance.
<a name="reducing-variance" />
## Disabling ASLR
If you see this error:
```
***WARNING*** ASLR is enabled, the results may have unreproducible noise in them.
```
you might want to disable the ASLR security hardening feature while running the
benchmark.
The simplest way is to add
```
benchmark::MaybeReenterWithoutASLR(argc, argv);
```
as the first line of your `main()` function. It will try to disable ASLR
for the current processor, and, if successful, re-execute the binary.
Note that `personality(2)` may be forbidden by e.g. seccomp (which happens
by default if you are running in a Docker container).
Note that if you link to `benchmark_main` already does that for you.
To globally disable ASLR on Linux, run
```
echo 0 > /proc/sys/kernel/randomize_va_space
```
To run a single benchmark with ASLR disabled on Linux, do:
```
setarch `uname -m` -R ./a_benchmark
```
Note that for the information on how to disable ASLR on other operating systems,
please refer to their documentation.
## Reducing Variance in Benchmarks
The Linux CPU frequency governor [discussed
above](user_guide#disabling-cpu-frequency-scaling) is not the only source
of noise in benchmarks. Some, but not all, of the sources of variance
include:
1. On multi-core machines not all CPUs/CPU cores/CPU threads run the same
speed, so running a benchmark one time and then again may give a
different result depending on which CPU it ran on.
2. CPU scaling features that run on the CPU, like Intel's Turbo Boost and
AMD Turbo Core and Precision Boost, can temporarily change the CPU
frequency even when the using the "performance" governor on Linux.
3. Context switching between CPUs, or scheduling competition on the CPU the
benchmark is running on.
4. Intel Hyperthreading or AMD SMT causing the same issue as above.
5. Cache effects caused by code running on other CPUs.
6. Non-uniform memory architectures (NUMA).
These can cause variance in benchmarks results within a single run
(`--benchmark_repetitions=N`) or across multiple runs of the benchmark
program.
Reducing sources of variance is OS and architecture dependent, which is one
reason some companies maintain machines dedicated to performance testing.
Some of the easier and effective ways of reducing variance on a typical
Linux workstation are:
1. Use the performance governor as [discussed
above](user_guide#disabling-cpu-frequency-scaling).
1. Disable processor boosting by:
```sh
echo 0 | sudo tee /sys/devices/system/cpu/cpufreq/boost
```
See the Linux kernel's
[boost.txt](https://www.kernel.org/doc/Documentation/cpu-freq/boost.txt)
for more information.
2. Set the benchmark program's task affinity to a fixed cpu. For example:
```sh
taskset -c 0 ./mybenchmark
```
3. Disabling Hyperthreading/SMT. This can be done in the Bios or using the
`/sys` file system (see the LLVM project's [Benchmarking
tips](https://llvm.org/docs/Benchmarking.html)).
4. Close other programs that do non-trivial things based on timers, such as
your web browser, desktop environment, etc.
5. Reduce the working set of your benchmark to fit within the L1 cache, but
do be aware that this may lead you to optimize for an unrealistic
situation.
Further resources on this topic:
1. The LLVM project's [Benchmarking
tips](https://llvm.org/docs/Benchmarking.html).
1. The Arch Wiki [Cpu frequency
scaling](https://wiki.archlinux.org/title/CPU_frequency_scaling) page.

38
third_party/benchmark/docs/releasing.md vendored Normal file
View File

@@ -0,0 +1,38 @@
# How to release
* Make sure you're on main and synced to HEAD
* Ensure the project builds and tests run
* `parallel -j0 exec ::: test/*_test` can help ensure everything at least
passes
* Prepare release notes
* `git log $(git describe --abbrev=0 --tags)..HEAD` gives you the list of
commits between the last annotated tag and HEAD
* Pick the most interesting.
* Create one last commit that updates the version saved in `CMakeLists.txt`, `MODULE.bazel`,
and `bindings/python/google_benchmark/__init__.py` to the release version you're creating.
(This version will be used if benchmark is installed from the archive you'll be creating
in the next step.)
```
# CMakeLists.txt
project (benchmark VERSION 1.9.0 LANGUAGES CXX)
```
```
# MODULE.bazel
module(name = "com_github_google_benchmark", version="1.9.0")
```
```
# google_benchmark/__init__.py
__version__ = "1.9.0"
```
* Create a release through github's interface
* Note this will create a lightweight tag.
* Update this to an annotated tag:
* `git pull --tags`
* `git tag -a -f <tag> <tag>`
* `git push --force --tags origin`
* Confirm that the "Build and upload Python wheels" action runs to completion
* Run it manually if it hasn't run.

View File

@@ -4,7 +4,11 @@
The `compare.py` can be used to compare the result of benchmarks.
**NOTE**: the utility relies on the scipy package which can be installed using [these instructions](https://www.scipy.org/install.html).
### Dependencies
The utility relies on the [scipy](https://www.scipy.org) package which can be installed using pip:
```bash
pip3 install -r requirements.txt
```
### Displaying aggregates only
@@ -182,6 +186,146 @@ Benchmark Time CPU Time Old
This is a mix of the previous two modes, two (potentially different) benchmark binaries are run, and a different filter is applied to each one.
As you can note, the values in `Time` and `CPU` columns are calculated as `(new - old) / |old|`.
### Note: Interpreting the output
Performance measurements are an art, and performance comparisons are doubly so.
Results are often noisy and don't necessarily have large absolute differences to
them, so just by visual inspection, it is not at all apparent if two
measurements are actually showing a performance change or not. It is even more
confusing with multiple benchmark repetitions.
Thankfully, what we can do, is use statistical tests on the results to determine
whether the performance has statistically-significantly changed. `compare.py`
uses [MannWhitney U
test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test), with a null
hypothesis being that there's no difference in performance.
**The below output is a summary of a benchmark comparison with statistics
provided for a multi-threaded process.**
```
Benchmark Time CPU Time Old Time New CPU Old CPU New
-----------------------------------------------------------------------------------------------------------------------------
benchmark/threads:1/process_time/real_time_pvalue 0.0000 0.0000 U Test, Repetitions: 27 vs 27
benchmark/threads:1/process_time/real_time_mean -0.1442 -0.1442 90 77 90 77
benchmark/threads:1/process_time/real_time_median -0.1444 -0.1444 90 77 90 77
benchmark/threads:1/process_time/real_time_stddev +0.3974 +0.3933 0 0 0 0
benchmark/threads:1/process_time/real_time_cv +0.6329 +0.6280 0 0 0 0
OVERALL_GEOMEAN -0.1442 -0.1442 0 0 0 0
```
--------------------------------------------
Here's a breakdown of each row:
**benchmark/threads:1/process_time/real_time_pvalue**: This shows the _p-value_ for
the statistical test comparing the performance of the process running with one
thread. A value of 0.0000 suggests a statistically significant difference in
performance. The comparison was conducted using the U Test (Mann-Whitney
U Test) with 27 repetitions for each case.
**benchmark/threads:1/process_time/real_time_mean**: This shows the relative
difference in mean execution time between two different cases. The negative
value (-0.1442) implies that the new process is faster by about 14.42%. The old
time was 90 units, while the new time is 77 units.
**benchmark/threads:1/process_time/real_time_median**: Similarly, this shows the
relative difference in the median execution time. Again, the new process is
faster by 14.44%.
**benchmark/threads:1/process_time/real_time_stddev**: This is the relative
difference in the standard deviation of the execution time, which is a measure
of how much variation or dispersion there is from the mean. A positive value
(+0.3974) implies there is more variance in the execution time in the new
process.
**benchmark/threads:1/process_time/real_time_cv**: CV stands for Coefficient of
Variation. It is the ratio of the standard deviation to the mean. It provides a
standardized measure of dispersion. An increase (+0.6329) indicates more
relative variability in the new process.
**OVERALL_GEOMEAN**: Geomean stands for geometric mean, a type of average that is
less influenced by outliers. The negative value indicates a general improvement
in the new process. However, given the values are all zero for the old and new
times, this seems to be a mistake or placeholder in the output.
-----------------------------------------
Let's first try to see what the different columns represent in the above
`compare.py` benchmarking output:
1. **Benchmark:** The name of the function being benchmarked, along with the
size of the input (after the slash).
2. **Time:** The average time per operation, across all iterations.
3. **CPU:** The average CPU time per operation, across all iterations.
4. **Iterations:** The number of iterations the benchmark was run to get a
stable estimate.
5. **Time Old and Time New:** These represent the average time it takes for a
function to run in two different scenarios or versions. For example, you
might be comparing how fast a function runs before and after you make some
changes to it.
6. **CPU Old and CPU New:** These show the average amount of CPU time that the
function uses in two different scenarios or versions. This is similar to
Time Old and Time New, but focuses on CPU usage instead of overall time.
In the comparison section, the relative differences in both time and CPU time
are displayed for each input size.
A statistically-significant difference is determined by a **p-value**, which is
a measure of the probability that the observed difference could have occurred
just by random chance. A smaller p-value indicates stronger evidence against the
null hypothesis.
**Therefore:**
1. If the p-value is less than the chosen significance level (alpha), we
reject the null hypothesis and conclude the benchmarks are significantly
different.
2. If the p-value is greater than or equal to alpha, we fail to reject the
null hypothesis and treat the two benchmarks as similar.
The result of said the statistical test is additionally communicated through color coding:
```diff
+ Green:
```
The benchmarks are _**statistically different**_. This could mean the
performance has either **significantly improved** or **significantly
deteriorated**. You should look at the actual performance numbers to see which
is the case.
```diff
- Red:
```
The benchmarks are _**statistically similar**_. This means the performance
**hasn't significantly changed**.
In statistical terms, **'green'** means we reject the null hypothesis that
there's no difference in performance, and **'red'** means we fail to reject the
null hypothesis. This might seem counter-intuitive if you're expecting 'green'
to mean 'improved performance' and 'red' to mean 'worsened performance'.
```bash
But remember, in this context:
'Success' means 'successfully finding a difference'.
'Failure' means 'failing to find a difference'.
```
Also, please note that **even if** we determine that there **is** a
statistically-significant difference between the two measurements, it does not
_necessarily_ mean that the actual benchmarks that were measured **are**
different, or vice versa, even if we determine that there is **no**
statistically-significant difference between the two measurements, it does not
necessarily mean that the actual benchmarks that were measured **are not**
different.
### U test
If there is a sufficient repetition count of the benchmarks, the tool can do

1405
third_party/benchmark/docs/user_guide.md vendored Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,47 @@
#ifndef BENCHMARK_EXPORT_H
#define BENCHMARK_EXPORT_H
#if defined(_WIN32)
#define EXPORT_ATTR __declspec(dllexport)
#define IMPORT_ATTR __declspec(dllimport)
#define NO_EXPORT_ATTR
#define DEPRECATED_ATTR __declspec(deprecated)
#else // _WIN32
#define EXPORT_ATTR __attribute__((visibility("default")))
#define IMPORT_ATTR __attribute__((visibility("default")))
#define NO_EXPORT_ATTR __attribute__((visibility("hidden")))
#define DEPRECATE_ATTR __attribute__((__deprecated__))
#endif // _WIN32
#ifdef BENCHMARK_STATIC_DEFINE
#define BENCHMARK_EXPORT
#define BENCHMARK_NO_EXPORT
#else // BENCHMARK_STATIC_DEFINE
#ifndef BENCHMARK_EXPORT
#ifdef benchmark_EXPORTS
/* We are building this library */
#define BENCHMARK_EXPORT EXPORT_ATTR
#else // benchmark_EXPORTS
/* We are using this library */
#define BENCHMARK_EXPORT IMPORT_ATTR
#endif // benchmark_EXPORTS
#endif // !BENCHMARK_EXPORT
#ifndef BENCHMARK_NO_EXPORT
#define BENCHMARK_NO_EXPORT NO_EXPORT_ATTR
#endif // !BENCHMARK_NO_EXPORT
#endif // BENCHMARK_STATIC_DEFINE
#ifndef BENCHMARK_DEPRECATED
#define BENCHMARK_DEPRECATED DEPRECATE_ATTR
#endif // BENCHMARK_DEPRECATED
#ifndef BENCHMARK_DEPRECATED_EXPORT
#define BENCHMARK_DEPRECATED_EXPORT BENCHMARK_EXPORT BENCHMARK_DEPRECATED
#endif // BENCHMARK_DEPRECATED_EXPORT
#ifndef BENCHMARK_DEPRECATED_NO_EXPORT
#define BENCHMARK_DEPRECATED_NO_EXPORT BENCHMARK_NO_EXPORT BENCHMARK_DEPRECATED
#endif // BENCHMARK_DEPRECATED_EXPORT
#endif /* BENCHMARK_EXPORT_H */

View File

@@ -1,320 +0,0 @@
#! /usr/bin/env python
# encoding: utf-8
import argparse
import errno
import logging
import os
import platform
import re
import sys
import subprocess
import tempfile
try:
import winreg
except ImportError:
import _winreg as winreg
try:
import urllib.request as request
except ImportError:
import urllib as request
try:
import urllib.parse as parse
except ImportError:
import urlparse as parse
class EmptyLogger(object):
'''
Provides an implementation that performs no logging
'''
def debug(self, *k, **kw):
pass
def info(self, *k, **kw):
pass
def warn(self, *k, **kw):
pass
def error(self, *k, **kw):
pass
def critical(self, *k, **kw):
pass
def setLevel(self, *k, **kw):
pass
urls = (
'http://downloads.sourceforge.net/project/mingw-w64/Toolchains%20'
'targetting%20Win32/Personal%20Builds/mingw-builds/installer/'
'repository.txt',
'http://downloads.sourceforge.net/project/mingwbuilds/host-windows/'
'repository.txt'
)
'''
A list of mingw-build repositories
'''
def repository(urls = urls, log = EmptyLogger()):
'''
Downloads and parse mingw-build repository files and parses them
'''
log.info('getting mingw-builds repository')
versions = {}
re_sourceforge = re.compile(r'http://sourceforge.net/projects/([^/]+)/files')
re_sub = r'http://downloads.sourceforge.net/project/\1'
for url in urls:
log.debug(' - requesting: %s', url)
socket = request.urlopen(url)
repo = socket.read()
if not isinstance(repo, str):
repo = repo.decode();
socket.close()
for entry in repo.split('\n')[:-1]:
value = entry.split('|')
version = tuple([int(n) for n in value[0].strip().split('.')])
version = versions.setdefault(version, {})
arch = value[1].strip()
if arch == 'x32':
arch = 'i686'
elif arch == 'x64':
arch = 'x86_64'
arch = version.setdefault(arch, {})
threading = arch.setdefault(value[2].strip(), {})
exceptions = threading.setdefault(value[3].strip(), {})
revision = exceptions.setdefault(int(value[4].strip()[3:]),
re_sourceforge.sub(re_sub, value[5].strip()))
return versions
def find_in_path(file, path=None):
'''
Attempts to find an executable in the path
'''
if platform.system() == 'Windows':
file += '.exe'
if path is None:
path = os.environ.get('PATH', '')
if type(path) is type(''):
path = path.split(os.pathsep)
return list(filter(os.path.exists,
map(lambda dir, file=file: os.path.join(dir, file), path)))
def find_7zip(log = EmptyLogger()):
'''
Attempts to find 7zip for unpacking the mingw-build archives
'''
log.info('finding 7zip')
path = find_in_path('7z')
if not path:
key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\7-Zip')
path, _ = winreg.QueryValueEx(key, 'Path')
path = [os.path.join(path, '7z.exe')]
log.debug('found \'%s\'', path[0])
return path[0]
find_7zip()
def unpack(archive, location, log = EmptyLogger()):
'''
Unpacks a mingw-builds archive
'''
sevenzip = find_7zip(log)
log.info('unpacking %s', os.path.basename(archive))
cmd = [sevenzip, 'x', archive, '-o' + location, '-y']
log.debug(' - %r', cmd)
with open(os.devnull, 'w') as devnull:
subprocess.check_call(cmd, stdout = devnull)
def download(url, location, log = EmptyLogger()):
'''
Downloads and unpacks a mingw-builds archive
'''
log.info('downloading MinGW')
log.debug(' - url: %s', url)
log.debug(' - location: %s', location)
re_content = re.compile(r'attachment;[ \t]*filename=(")?([^"]*)(")?[\r\n]*')
stream = request.urlopen(url)
try:
content = stream.getheader('Content-Disposition') or ''
except AttributeError:
content = stream.headers.getheader('Content-Disposition') or ''
matches = re_content.match(content)
if matches:
filename = matches.group(2)
else:
parsed = parse.urlparse(stream.geturl())
filename = os.path.basename(parsed.path)
try:
os.makedirs(location)
except OSError as e:
if e.errno == errno.EEXIST and os.path.isdir(location):
pass
else:
raise
archive = os.path.join(location, filename)
with open(archive, 'wb') as out:
while True:
buf = stream.read(1024)
if not buf:
break
out.write(buf)
unpack(archive, location, log = log)
os.remove(archive)
possible = os.path.join(location, 'mingw64')
if not os.path.exists(possible):
possible = os.path.join(location, 'mingw32')
if not os.path.exists(possible):
raise ValueError('Failed to find unpacked MinGW: ' + possible)
return possible
def root(location = None, arch = None, version = None, threading = None,
exceptions = None, revision = None, log = EmptyLogger()):
'''
Returns the root folder of a specific version of the mingw-builds variant
of gcc. Will download the compiler if needed
'''
# Get the repository if we don't have all the information
if not (arch and version and threading and exceptions and revision):
versions = repository(log = log)
# Determine some defaults
version = version or max(versions.keys())
if not arch:
arch = platform.machine().lower()
if arch == 'x86':
arch = 'i686'
elif arch == 'amd64':
arch = 'x86_64'
if not threading:
keys = versions[version][arch].keys()
if 'posix' in keys:
threading = 'posix'
elif 'win32' in keys:
threading = 'win32'
else:
threading = keys[0]
if not exceptions:
keys = versions[version][arch][threading].keys()
if 'seh' in keys:
exceptions = 'seh'
elif 'sjlj' in keys:
exceptions = 'sjlj'
else:
exceptions = keys[0]
if revision == None:
revision = max(versions[version][arch][threading][exceptions].keys())
if not location:
location = os.path.join(tempfile.gettempdir(), 'mingw-builds')
# Get the download url
url = versions[version][arch][threading][exceptions][revision]
# Tell the user whatzzup
log.info('finding MinGW %s', '.'.join(str(v) for v in version))
log.debug(' - arch: %s', arch)
log.debug(' - threading: %s', threading)
log.debug(' - exceptions: %s', exceptions)
log.debug(' - revision: %s', revision)
log.debug(' - url: %s', url)
# Store each specific revision differently
slug = '{version}-{arch}-{threading}-{exceptions}-rev{revision}'
slug = slug.format(
version = '.'.join(str(v) for v in version),
arch = arch,
threading = threading,
exceptions = exceptions,
revision = revision
)
if arch == 'x86_64':
root_dir = os.path.join(location, slug, 'mingw64')
elif arch == 'i686':
root_dir = os.path.join(location, slug, 'mingw32')
else:
raise ValueError('Unknown MinGW arch: ' + arch)
# Download if needed
if not os.path.exists(root_dir):
downloaded = download(url, os.path.join(location, slug), log = log)
if downloaded != root_dir:
raise ValueError('The location of mingw did not match\n%s\n%s'
% (downloaded, root_dir))
return root_dir
def str2ver(string):
'''
Converts a version string into a tuple
'''
try:
version = tuple(int(v) for v in string.split('.'))
if len(version) is not 3:
raise ValueError()
except ValueError:
raise argparse.ArgumentTypeError(
'please provide a three digit version string')
return version
def main():
'''
Invoked when the script is run directly by the python interpreter
'''
parser = argparse.ArgumentParser(
description = 'Downloads a specific version of MinGW',
formatter_class = argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument('--location',
help = 'the location to download the compiler to',
default = os.path.join(tempfile.gettempdir(), 'mingw-builds'))
parser.add_argument('--arch', required = True, choices = ['i686', 'x86_64'],
help = 'the target MinGW architecture string')
parser.add_argument('--version', type = str2ver,
help = 'the version of GCC to download')
parser.add_argument('--threading', choices = ['posix', 'win32'],
help = 'the threading type of the compiler')
parser.add_argument('--exceptions', choices = ['sjlj', 'seh', 'dwarf'],
help = 'the method to throw exceptions')
parser.add_argument('--revision', type=int,
help = 'the revision of the MinGW release')
group = parser.add_mutually_exclusive_group()
group.add_argument('-v', '--verbose', action='store_true',
help='increase the script output verbosity')
group.add_argument('-q', '--quiet', action='store_true',
help='only print errors and warning')
args = parser.parse_args()
# Create the logger
logger = logging.getLogger('mingw')
handler = logging.StreamHandler()
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)
if args.quiet:
logger.setLevel(logging.WARN)
if args.verbose:
logger.setLevel(logging.DEBUG)
# Get MinGW
root_dir = root(location = args.location, arch = args.arch,
version = args.version, threading = args.threading,
exceptions = args.exceptions, revision = args.revision,
log = logger)
sys.stdout.write('%s\n' % os.path.join(root_dir, 'bin'))
if __name__ == '__main__':
try:
main()
except IOError as e:
sys.stderr.write('IO error: %s\n' % e)
sys.exit(1)
except OSError as e:
sys.stderr.write('OS error: %s\n' % e)
sys.exit(1)
except KeyboardInterrupt as e:
sys.stderr.write('Killed\n')
sys.exit(1)

78
third_party/benchmark/pyproject.toml vendored Normal file
View File

@@ -0,0 +1,78 @@
[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"
[project]
name = "google_benchmark"
description = "A library to benchmark code snippets."
requires-python = ">=3.10"
license = { file = "LICENSE" }
keywords = ["benchmark"]
authors = [{ name = "Google", email = "benchmark-discuss@googlegroups.com" }]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Software Development :: Testing",
"Topic :: System :: Benchmark",
]
dynamic = ["readme", "version"]
dependencies = ["absl-py>=0.7.1"]
[project.optional-dependencies]
dev = ["pre-commit>=3.3.3"]
[project.urls]
Homepage = "https://github.com/google/benchmark"
Documentation = "https://github.com/google/benchmark/tree/main/docs"
Repository = "https://github.com/google/benchmark.git"
Discord = "https://discord.gg/cz7UX7wKC2"
[tool.setuptools]
package-dir = { "" = "bindings/python" }
zip-safe = false
[tool.setuptools.packages.find]
where = ["bindings/python"]
[tool.setuptools.dynamic]
readme = { file = "README.md", content-type = "text/markdown" }
version = { attr = "google_benchmark.__version__" }
[tool.mypy]
check_untyped_defs = true
disallow_incomplete_defs = true
pretty = true
python_version = "3.11"
strict_optional = false
warn_unreachable = true
[[tool.mypy.overrides]]
module = ["yaml"]
ignore_missing_imports = true
[tool.ruff]
# explicitly tell ruff the source directory to correctly identify first-party package.
src = ["bindings/python"]
line-length = 80
target-version = "py311"
[tool.ruff.lint]
# Enable pycodestyle (`E`, `W`), Pyflakes (`F`), and isort (`I`) codes by default.
select = ["ASYNC", "B", "C4", "C90", "E", "F", "I", "PERF", "PIE", "PT018", "RUF", "SIM", "UP", "W"]
ignore = [
"PLW2901", # redefined-loop-name
"UP031", # printf-string-formatting
]
[tool.ruff.lint.isort]
combine-as-imports = true

View File

@@ -1,16 +0,0 @@
# How to release
* Make sure you're on master and synced to HEAD
* Ensure the project builds and tests run (sanity check only, obviously)
* `parallel -j0 exec ::: test/*_test` can help ensure everything at least
passes
* Prepare release notes
* `git log $(git describe --abbrev=0 --tags)..HEAD` gives you the list of
commits between the last annotated tag and HEAD
* Pick the most interesting.
* Create a release through github's interface
* Note this will create a lightweight tag.
* Update this to an annotated tag:
* `git pull --tags`
* `git tag -a -f <tag> <tag>`
* `git push --force origin`

166
third_party/benchmark/setup.py vendored Normal file
View File

@@ -0,0 +1,166 @@
import contextlib
import os
import platform
import re
import shutil
import sys
from collections.abc import Generator
from pathlib import Path
from typing import Any
import setuptools
from setuptools.command import build_ext
IS_WINDOWS = platform.system() == "Windows"
IS_MAC = platform.system() == "Darwin"
IS_LINUX = platform.system() == "Linux"
# hardcoded SABI-related options. Requires that each Python interpreter
# (hermetic or not) participating is of the same major-minor version.
py_limited_api = sys.version_info >= (3, 12)
options = {"bdist_wheel": {"py_limited_api": "cp312"}} if py_limited_api else {}
def is_cibuildwheel() -> bool:
return os.getenv("CIBUILDWHEEL") is not None
@contextlib.contextmanager
def _maybe_patch_toolchains() -> Generator[None, None, None]:
"""
Patch rules_python toolchains to ignore root user error
when run in a Docker container on Linux in cibuildwheel.
"""
def fmt_toolchain_args(matchobj):
suffix = "ignore_root_user_error = True"
callargs = matchobj.group(1)
# toolchain def is broken over multiple lines
if callargs.endswith("\n"):
callargs = callargs + " " + suffix + ",\n"
# toolchain def is on one line.
else:
callargs = callargs + ", " + suffix
return "python.toolchain(" + callargs + ")"
CIBW_LINUX = is_cibuildwheel() and IS_LINUX
module_bazel = Path("MODULE.bazel")
content: str = module_bazel.read_text()
try:
if CIBW_LINUX:
module_bazel.write_text(
re.sub(
r"python.toolchain\(([\w\"\s,.=]*)\)",
fmt_toolchain_args,
content,
)
)
yield
finally:
if CIBW_LINUX:
module_bazel.write_text(content)
class BazelExtension(setuptools.Extension):
"""A C/C++ extension that is defined as a Bazel BUILD target."""
def __init__(self, name: str, bazel_target: str, **kwargs: Any):
super().__init__(name=name, sources=[], **kwargs)
self.bazel_target = bazel_target
stripped_target = bazel_target.split("//")[-1]
self.relpath, self.target_name = stripped_target.split(":")
class BuildBazelExtension(build_ext.build_ext):
"""A command that runs Bazel to build a C/C++ extension."""
def run(self):
for ext in self.extensions:
self.bazel_build(ext)
# explicitly call `bazel shutdown` for graceful exit
self.spawn(["bazel", "shutdown"])
def copy_extensions_to_source(self):
"""
Copy generated extensions into the source tree.
This is done in the ``bazel_build`` method, so it's not necessary to
do again in the `build_ext` base class.
"""
def bazel_build(self, ext: BazelExtension) -> None: # noqa: C901
"""Runs the bazel build to create the package."""
temp_path = Path(self.build_temp)
# We round to the minor version, which makes rules_python
# look up the latest available patch version internally.
python_version = "{}.{}".format(*sys.version_info[:2])
bazel_argv = [
"bazel",
"run",
ext.bazel_target,
f"--symlink_prefix={temp_path / 'bazel-'}",
f"--compilation_mode={'dbg' if self.debug else 'opt'}",
# C++17 is required by nanobind
f"--cxxopt={'/std:c++17' if IS_WINDOWS else '-std=c++17'}",
f"--@rules_python//python/config_settings:python_version={python_version}",
]
if ext.py_limited_api:
bazel_argv += ["--@nanobind_bazel//:py-limited-api=cp312"]
if IS_WINDOWS:
# Link with python*.lib.
for library_dir in self.library_dirs:
bazel_argv.append("--linkopt=/LIBPATH:" + library_dir)
elif IS_MAC:
# C++17 needs macOS 10.14 at minimum
bazel_argv.append("--macos_minimum_os=10.14")
with _maybe_patch_toolchains():
self.spawn(bazel_argv)
if IS_WINDOWS:
suffix = ".pyd"
else:
suffix = ".abi3.so" if ext.py_limited_api else ".so"
# copy the Bazel build artifacts into setuptools' libdir,
# from where the wheel is built.
pkgname = "google_benchmark"
pythonroot = Path("bindings") / "python" / "google_benchmark"
srcdir = temp_path / "bazel-bin" / pythonroot
libdir = Path(self.build_lib) / pkgname
for root, dirs, files in os.walk(srcdir, topdown=True):
# exclude runfiles directories and children.
dirs[:] = [d for d in dirs if "runfiles" not in d]
for f in files:
fp = Path(f)
should_copy = False
# we do not want the bare .so file included
# when building for ABI3, so we require a
# full and exact match on the file extension.
if "".join(fp.suffixes) == suffix or fp.suffix == ".pyi":
should_copy = True
elif Path(root) == srcdir and f == "py.typed":
# copy py.typed, but only at the package root.
should_copy = True
if should_copy:
shutil.copyfile(root / fp, libdir / fp)
setuptools.setup(
cmdclass={"build_ext": BuildBazelExtension},
package_data={"google_benchmark": ["py.typed", "*.pyi"]},
ext_modules=[
BazelExtension(
name="google_benchmark._benchmark",
bazel_target="//bindings/python/google_benchmark:benchmark_stubgen",
py_limited_api=py_limited_api,
)
],
options=options,
)

View File

@@ -1,4 +1,5 @@
# Allow the source files to find headers in src/
#Allow the source files to find headers in src /
include(GNUInstallDirs)
include_directories(${PROJECT_SOURCE_DIR}/src)
if (DEFINED BENCHMARK_CXX_LINKER_FLAGS)
@@ -17,92 +18,166 @@ foreach(item ${BENCHMARK_MAIN})
endforeach()
add_library(benchmark ${SOURCE_FILES})
add_library(benchmark::benchmark ALIAS benchmark)
set_target_properties(benchmark PROPERTIES
OUTPUT_NAME "benchmark"
VERSION ${GENERIC_LIB_VERSION}
SOVERSION ${GENERIC_LIB_SOVERSION}
)
target_include_directories(benchmark PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
)
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
)
set_property(
SOURCE benchmark.cc
APPEND
PROPERTY COMPILE_DEFINITIONS
BENCHMARK_VERSION="${VERSION}"
)
# libpfm, if available
if (PFM_FOUND)
target_link_libraries(benchmark PRIVATE PFM::libpfm)
target_compile_definitions(benchmark PRIVATE -DHAVE_LIBPFM)
install(
FILES "${PROJECT_SOURCE_DIR}/cmake/Modules/FindPFM.cmake"
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
endif()
# pthread affinity, if available
if(HAVE_PTHREAD_AFFINITY)
target_compile_definitions(benchmark PRIVATE -DBENCHMARK_HAS_PTHREAD_AFFINITY)
endif()
# Link threads.
target_link_libraries(benchmark ${BENCHMARK_CXX_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
find_library(LIBRT rt)
if(LIBRT)
target_link_libraries(benchmark ${LIBRT})
endif()
target_link_libraries(benchmark PRIVATE Threads::Threads)
target_link_libraries(benchmark PRIVATE ${BENCHMARK_CXX_LIBRARIES})
if(HAVE_LIB_RT)
target_link_libraries(benchmark PRIVATE rt)
endif(HAVE_LIB_RT)
# We need extra libraries on Windows
if(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
target_link_libraries(benchmark Shlwapi)
target_link_libraries(benchmark PRIVATE shlwapi)
endif()
# We need extra libraries on Solaris
if(${CMAKE_SYSTEM_NAME} MATCHES "SunOS")
target_link_libraries(benchmark kstat)
target_link_libraries(benchmark PRIVATE kstat)
set(BENCHMARK_PRIVATE_LINK_LIBRARIES -lkstat)
endif()
if (NOT BUILD_SHARED_LIBS)
target_compile_definitions(benchmark PUBLIC -DBENCHMARK_STATIC_DEFINE)
endif()
# Benchmark main library
add_library(benchmark_main "benchmark_main.cc")
add_library(benchmark::benchmark_main ALIAS benchmark_main)
set_target_properties(benchmark_main PROPERTIES
OUTPUT_NAME "benchmark_main"
VERSION ${GENERIC_LIB_VERSION}
SOVERSION ${GENERIC_LIB_SOVERSION}
DEFINE_SYMBOL benchmark_EXPORTS
)
target_include_directories(benchmark PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
)
target_link_libraries(benchmark_main benchmark)
target_link_libraries(benchmark_main PUBLIC benchmark::benchmark)
set(include_install_dir "include")
set(lib_install_dir "lib/")
set(bin_install_dir "bin/")
set(config_install_dir "lib/cmake/${PROJECT_NAME}")
set(pkgconfig_install_dir "lib/pkgconfig")
set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated")
set(generated_dir "${PROJECT_BINARY_DIR}")
set(version_config "${generated_dir}/${PROJECT_NAME}ConfigVersion.cmake")
set(project_config "${generated_dir}/${PROJECT_NAME}Config.cmake")
set(pkg_config "${generated_dir}/${PROJECT_NAME}.pc")
set(pkg_config_main "${generated_dir}/${PROJECT_NAME}_main.pc")
set(targets_to_export benchmark benchmark_main)
set(targets_export_name "${PROJECT_NAME}Targets")
set(namespace "${PROJECT_NAME}::")
include(CMakePackageConfigHelpers)
configure_package_config_file (
${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in
${project_config}
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
NO_SET_AND_CHECK_MACRO
NO_CHECK_REQUIRED_COMPONENTS_MACRO
)
write_basic_package_version_file(
"${version_config}" VERSION ${GENERIC_LIB_VERSION} COMPATIBILITY SameMajorVersion
)
configure_file("${PROJECT_SOURCE_DIR}/cmake/Config.cmake.in" "${project_config}" @ONLY)
configure_file("${PROJECT_SOURCE_DIR}/cmake/benchmark.pc.in" "${pkg_config}" @ONLY)
configure_file("${PROJECT_SOURCE_DIR}/cmake/benchmark_main.pc.in" "${pkg_config_main}" @ONLY)
export (
TARGETS ${targets_to_export}
NAMESPACE "${namespace}"
FILE ${generated_dir}/${targets_export_name}.cmake
)
if (BENCHMARK_ENABLE_INSTALL)
# Install target (will install the library to specified CMAKE_INSTALL_PREFIX variable)
install(
TARGETS benchmark benchmark_main
TARGETS ${targets_to_export}
EXPORT ${targets_export_name}
ARCHIVE DESTINATION ${lib_install_dir}
LIBRARY DESTINATION ${lib_install_dir}
RUNTIME DESTINATION ${bin_install_dir}
INCLUDES DESTINATION ${include_install_dir})
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
install(
DIRECTORY "${PROJECT_SOURCE_DIR}/include/benchmark"
DESTINATION ${include_install_dir}
"${PROJECT_BINARY_DIR}/include/benchmark"
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
FILES_MATCHING PATTERN "*.*h")
install(
FILES "${project_config}" "${version_config}"
DESTINATION "${config_install_dir}")
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
install(
FILES "${pkg_config}"
DESTINATION "${pkgconfig_install_dir}")
FILES "${pkg_config}" "${pkg_config_main}"
DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
install(
EXPORT "${targets_export_name}"
NAMESPACE "${namespace}"
DESTINATION "${config_install_dir}")
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
endif()
if (BENCHMARK_ENABLE_DOXYGEN)
find_package(Doxygen REQUIRED)
set(DOXYGEN_QUIET YES)
set(DOXYGEN_RECURSIVE YES)
set(DOXYGEN_GENERATE_HTML YES)
set(DOXYGEN_GENERATE_MAN NO)
set(DOXYGEN_MARKDOWN_SUPPORT YES)
set(DOXYGEN_BUILTIN_STL_SUPPORT YES)
set(DOXYGEN_EXTRACT_PACKAGE YES)
set(DOXYGEN_EXTRACT_STATIC YES)
set(DOXYGEN_SHOW_INCLUDE_FILES YES)
set(DOXYGEN_BINARY_TOC YES)
set(DOXYGEN_TOC_EXPAND YES)
set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "index.md")
doxygen_add_docs(benchmark_doxygen
docs
include
src
ALL
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMENT "Building documentation with Doxygen.")
if (BENCHMARK_ENABLE_INSTALL AND BENCHMARK_INSTALL_DOCS)
install(
DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/html/"
DESTINATION ${CMAKE_INSTALL_DOCDIR})
endif()
else()
if (BENCHMARK_ENABLE_INSTALL AND BENCHMARK_INSTALL_DOCS)
install(
DIRECTORY "${PROJECT_SOURCE_DIR}/docs/"
DESTINATION ${CMAKE_INSTALL_DOCDIR})
endif()
endif()

File diff suppressed because it is too large Load Diff

View File

@@ -1,15 +1,118 @@
#include "benchmark_api_internal.h"
#include <cinttypes>
#include "string_util.h"
namespace benchmark {
namespace internal {
BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx,
int per_family_instance_idx,
const std::vector<int64_t>& args,
int thread_count)
: benchmark_(*benchmark),
family_index_(family_idx),
per_family_instance_index_(per_family_instance_idx),
aggregation_report_mode_(benchmark_.aggregation_report_mode_),
args_(args),
time_unit_(benchmark_.GetTimeUnit()),
measure_process_cpu_time_(benchmark_.measure_process_cpu_time_),
use_real_time_(benchmark_.use_real_time_),
use_manual_time_(benchmark_.use_manual_time_),
complexity_(benchmark_.complexity_),
complexity_lambda_(benchmark_.complexity_lambda_),
statistics_(benchmark_.statistics_),
repetitions_(benchmark_.repetitions_),
min_time_(benchmark_.min_time_),
min_warmup_time_(benchmark_.min_warmup_time_),
iterations_(benchmark_.iterations_),
threads_(thread_count),
setup_(benchmark_.setup_),
teardown_(benchmark_.teardown_) {
name_.function_name = benchmark_.name_;
size_t arg_i = 0;
for (const auto& arg : args) {
if (!name_.args.empty()) {
name_.args += '/';
}
if (arg_i < benchmark->arg_names_.size()) {
const auto& arg_name = benchmark_.arg_names_[arg_i];
if (!arg_name.empty()) {
name_.args += StrFormat("%s:", arg_name.c_str());
}
}
name_.args += StrFormat("%" PRId64, arg);
++arg_i;
}
if (!IsZero(benchmark->min_time_)) {
name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_);
}
if (!IsZero(benchmark->min_warmup_time_)) {
name_.min_warmup_time =
StrFormat("min_warmup_time:%0.3f", benchmark_.min_warmup_time_);
}
if (benchmark_.iterations_ != 0) {
name_.iterations = StrFormat(
"iterations:%lu", static_cast<unsigned long>(benchmark_.iterations_));
}
if (benchmark_.repetitions_ != 0) {
name_.repetitions = StrFormat("repeats:%d", benchmark_.repetitions_);
}
if (benchmark_.measure_process_cpu_time_) {
name_.time_type = "process_time";
}
if (benchmark_.use_manual_time_) {
if (!name_.time_type.empty()) {
name_.time_type += '/';
}
name_.time_type += "manual_time";
} else if (benchmark_.use_real_time_) {
if (!name_.time_type.empty()) {
name_.time_type += '/';
}
name_.time_type += "real_time";
}
if (!benchmark_.thread_counts_.empty()) {
name_.threads = StrFormat("threads:%d", threads_);
}
}
State BenchmarkInstance::Run(
size_t iters, int thread_id, internal::ThreadTimer* timer,
internal::ThreadManager* manager) const {
State st(iters, arg, thread_id, threads, timer, manager);
benchmark->Run(st);
IterationCount iters, int thread_id, internal::ThreadTimer* timer,
internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement,
ProfilerManager* profiler_manager) const {
State st(name_.function_name, iters, args_, thread_id, threads_, timer,
manager, perf_counters_measurement, profiler_manager);
benchmark_.Run(st);
return st;
}
} // internal
} // benchmark
void BenchmarkInstance::Setup() const {
if (setup_ != nullptr) {
State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
nullptr, nullptr, nullptr, nullptr);
setup_(st);
}
}
void BenchmarkInstance::Teardown() const {
if (teardown_ != nullptr) {
State st(name_.function_name, /*iters*/ 1, args_, /*thread_id*/ 0, threads_,
nullptr, nullptr, nullptr, nullptr);
teardown_(st);
}
}
} // namespace internal
} // namespace benchmark

View File

@@ -1,9 +1,6 @@
#ifndef BENCHMARK_API_INTERNAL_H
#define BENCHMARK_API_INTERNAL_H
#include "benchmark/benchmark.h"
#include "commandlineflags.h"
#include <cmath>
#include <iosfwd>
#include <limits>
@@ -11,31 +8,71 @@
#include <string>
#include <vector>
#include "benchmark/benchmark.h"
#include "commandlineflags.h"
namespace benchmark {
namespace internal {
// Information kept per benchmark we may want to run
struct BenchmarkInstance {
std::string name;
Benchmark* benchmark;
AggregationReportMode aggregation_report_mode;
std::vector<int64_t> arg;
TimeUnit time_unit;
int range_multiplier;
bool use_real_time;
bool use_manual_time;
BigO complexity;
BigOFunc* complexity_lambda;
UserCounters counters;
const std::vector<Statistics>* statistics;
bool last_benchmark_instance;
int repetitions;
double min_time;
size_t iterations;
int threads; // Number of concurrent threads to us
class BenchmarkInstance {
public:
BenchmarkInstance(Benchmark* benchmark, int family_idx,
int per_family_instance_idx,
const std::vector<int64_t>& args, int thread_count);
State Run(size_t iters, int thread_id, internal::ThreadTimer* timer,
internal::ThreadManager* manager) const;
const BenchmarkName& name() const { return name_; }
int family_index() const { return family_index_; }
int per_family_instance_index() const { return per_family_instance_index_; }
AggregationReportMode aggregation_report_mode() const {
return aggregation_report_mode_;
}
TimeUnit time_unit() const { return time_unit_; }
bool measure_process_cpu_time() const { return measure_process_cpu_time_; }
bool use_real_time() const { return use_real_time_; }
bool use_manual_time() const { return use_manual_time_; }
BigO complexity() const { return complexity_; }
BigOFunc* complexity_lambda() const { return complexity_lambda_; }
const std::vector<Statistics>& statistics() const { return statistics_; }
int repetitions() const { return repetitions_; }
double min_time() const { return min_time_; }
double min_warmup_time() const { return min_warmup_time_; }
IterationCount iterations() const { return iterations_; }
int threads() const { return threads_; }
void Setup() const;
void Teardown() const;
const auto& GetUserThreadRunnerFactory() const {
return benchmark_.threadrunner_;
}
State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer,
internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement,
ProfilerManager* profiler_manager) const;
private:
BenchmarkName name_;
Benchmark& benchmark_;
const int family_index_;
const int per_family_instance_index_;
AggregationReportMode aggregation_report_mode_;
const std::vector<int64_t>& args_;
TimeUnit time_unit_;
bool measure_process_cpu_time_;
bool use_real_time_;
bool use_manual_time_;
BigO complexity_;
BigOFunc* complexity_lambda_;
UserCounters counters_;
const std::vector<Statistics>& statistics_;
int repetitions_;
double min_time_;
double min_warmup_time_;
IterationCount iterations_;
int threads_; // Number of concurrent threads to us
callback_function setup_;
callback_function teardown_;
};
bool FindBenchmarksInternal(const std::string& re,
@@ -44,6 +81,7 @@ bool FindBenchmarksInternal(const std::string& re,
bool IsZero(double n);
BENCHMARK_EXPORT
ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false);
} // end namespace internal

View File

@@ -14,4 +14,5 @@
#include "benchmark/benchmark.h"
BENCHMARK_EXPORT int main(int /*argc*/, char** /*argv*/);
BENCHMARK_MAIN();

View File

@@ -0,0 +1,59 @@
// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <benchmark/benchmark.h>
namespace benchmark {
namespace {
// Compute the total size of a pack of std::strings
size_t size_impl() { return 0; }
template <typename Head, typename... Tail>
size_t size_impl(const Head& head, const Tail&... tail) {
return head.size() + size_impl(tail...);
}
// Join a pack of std::strings using a delimiter
// TODO(dominic): use absl::StrJoin
void join_impl(std::string& /*unused*/, char /*unused*/) {}
template <typename Head, typename... Tail>
void join_impl(std::string& s, const char delimiter, const Head& head,
const Tail&... tail) {
if (!s.empty() && !head.empty()) {
s += delimiter;
}
s += head;
join_impl(s, delimiter, tail...);
}
template <typename... Ts>
std::string join(char delimiter, const Ts&... ts) {
std::string s;
s.reserve(sizeof...(Ts) + size_impl(ts...));
join_impl(s, delimiter, ts...);
return s;
}
} // namespace
BENCHMARK_EXPORT
std::string BenchmarkName::str() const {
return join('/', function_name, args, min_time, min_warmup_time, iterations,
repetitions, time_type, threads);
}
} // namespace benchmark

View File

@@ -15,7 +15,7 @@
#include "benchmark_register.h"
#ifndef BENCHMARK_OS_WINDOWS
#ifndef BENCHMARK_OS_FUCHSIA
#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#include <sys/resource.h>
#endif
#include <sys/time.h>
@@ -24,6 +24,7 @@
#include <algorithm>
#include <atomic>
#include <cinttypes>
#include <condition_variable>
#include <cstdio>
#include <cstdlib>
@@ -31,6 +32,7 @@
#include <fstream>
#include <iostream>
#include <memory>
#include <numeric>
#include <sstream>
#include <thread>
@@ -51,10 +53,13 @@ namespace benchmark {
namespace {
// For non-dense Range, intermediate values are powers of kRangeMultiplier.
static const int kRangeMultiplier = 8;
constexpr int kRangeMultiplier = 8;
// The size of a benchmark family determines is the number of inputs to repeat
// the benchmark on. If this is "large" then warn the user during configuration.
static const size_t kMaxFamilySize = 100;
constexpr size_t kMaxFamilySize = 100;
constexpr char kDisabledPrefix[] = "DISABLED_";
} // end namespace
namespace internal {
@@ -77,7 +82,7 @@ class BenchmarkFamilies {
// Extract the list of benchmark instances that match the specified
// regular expression.
bool FindBenchmarks(std::string re,
bool FindBenchmarks(std::string spec,
std::vector<BenchmarkInstance>* benchmarks,
std::ostream* Err);
@@ -109,28 +114,35 @@ void BenchmarkFamilies::ClearBenchmarks() {
bool BenchmarkFamilies::FindBenchmarks(
std::string spec, std::vector<BenchmarkInstance>* benchmarks,
std::ostream* ErrStream) {
CHECK(ErrStream);
BM_CHECK(ErrStream);
auto& Err = *ErrStream;
// Make regular expression out of command-line flag
std::string error_msg;
Regex re;
bool isNegativeFilter = false;
bool is_negative_filter = false;
if (spec[0] == '-') {
spec.replace(0, 1, "");
isNegativeFilter = true;
is_negative_filter = true;
}
if (!re.Init(spec, &error_msg)) {
Err << "Could not compile benchmark re: " << error_msg << std::endl;
Err << "Could not compile benchmark re: " << error_msg << '\n';
return false;
}
// Special list of thread counts to use when none are specified
const std::vector<int> one_thread = {1};
int next_family_index = 0;
MutexLock l(mutex_);
for (std::unique_ptr<Benchmark>& family : families_) {
int family_index = next_family_index;
int per_family_instance_index = 0;
// Family was deleted or benchmark doesn't match
if (!family) continue;
if (!family) {
continue;
}
if (family->ArgsCnt() == -1) {
family->Args({});
@@ -147,67 +159,31 @@ bool BenchmarkFamilies::FindBenchmarks(
<< " will be repeated at least " << family_size << " times.\n";
}
// reserve in the special case the regex ".", since we know the final
// family size.
if (spec == ".") benchmarks->reserve(family_size);
// family size. this doesn't take into account any disabled benchmarks
// so worst case we reserve more than we need.
if (spec == ".") {
benchmarks->reserve(benchmarks->size() + family_size);
}
for (auto const& args : family->args_) {
for (int num_threads : *thread_counts) {
BenchmarkInstance instance;
instance.name = family->name_;
instance.benchmark = family.get();
instance.aggregation_report_mode = family->aggregation_report_mode_;
instance.arg = args;
instance.time_unit = family->time_unit_;
instance.range_multiplier = family->range_multiplier_;
instance.min_time = family->min_time_;
instance.iterations = family->iterations_;
instance.repetitions = family->repetitions_;
instance.use_real_time = family->use_real_time_;
instance.use_manual_time = family->use_manual_time_;
instance.complexity = family->complexity_;
instance.complexity_lambda = family->complexity_lambda_;
instance.statistics = &family->statistics_;
instance.threads = num_threads;
BenchmarkInstance instance(family.get(), family_index,
per_family_instance_index, args,
num_threads);
// Add arguments to instance name
size_t arg_i = 0;
for (auto const& arg : args) {
instance.name += "/";
if (arg_i < family->arg_names_.size()) {
const auto& arg_name = family->arg_names_[arg_i];
if (!arg_name.empty()) {
instance.name +=
StrFormat("%s:", family->arg_names_[arg_i].c_str());
}
}
instance.name += StrFormat("%d", arg);
++arg_i;
}
if (!IsZero(family->min_time_))
instance.name += StrFormat("/min_time:%0.3f", family->min_time_);
if (family->iterations_ != 0)
instance.name += StrFormat("/iterations:%d", family->iterations_);
if (family->repetitions_ != 0)
instance.name += StrFormat("/repeats:%d", family->repetitions_);
if (family->use_manual_time_) {
instance.name += "/manual_time";
} else if (family->use_real_time_) {
instance.name += "/real_time";
}
// Add the number of threads used to the name
if (!family->thread_counts_.empty()) {
instance.name += StrFormat("/threads:%d", instance.threads);
}
if ((re.Match(instance.name) && !isNegativeFilter) ||
(!re.Match(instance.name) && isNegativeFilter)) {
instance.last_benchmark_instance = (&args == &family->args_.back());
const auto full_name = instance.name().str();
if (full_name.rfind(kDisabledPrefix, 0) != 0 &&
((re.Match(full_name) && !is_negative_filter) ||
(!re.Match(full_name) && is_negative_filter))) {
benchmarks->push_back(std::move(instance));
++per_family_instance_index;
// Only bump the next family index once we've estabilished that
// at least one instance of this family will be run.
if (next_family_index == family_index) {
++next_family_index;
}
}
}
}
@@ -215,11 +191,11 @@ bool BenchmarkFamilies::FindBenchmarks(
return true;
}
Benchmark* RegisterBenchmarkInternal(Benchmark* bench) {
std::unique_ptr<Benchmark> bench_ptr(bench);
Benchmark* RegisterBenchmarkInternal(std::unique_ptr<Benchmark> bench) {
Benchmark* bench_ptr = bench.get();
BenchmarkFamilies* families = BenchmarkFamilies::GetInstance();
families->AddBenchmark(std::move(bench_ptr));
return bench;
families->AddBenchmark(std::move(bench));
return bench_ptr;
}
// FIXME: This function is a hack so that benchmark.cc can access
@@ -234,14 +210,17 @@ bool FindBenchmarksInternal(const std::string& re,
// Benchmark
//=============================================================================//
Benchmark::Benchmark(const char* name)
Benchmark::Benchmark(const std::string& name)
: name_(name),
aggregation_report_mode_(ARM_Unspecified),
time_unit_(kNanosecond),
time_unit_(GetDefaultTimeUnit()),
use_default_time_unit_(true),
range_multiplier_(kRangeMultiplier),
min_time_(0),
min_warmup_time_(0),
iterations_(0),
repetitions_(0),
measure_process_cpu_time_(false),
use_real_time_(false),
use_manual_time_(false),
complexity_(oNone),
@@ -249,23 +228,30 @@ Benchmark::Benchmark(const char* name)
ComputeStatistics("mean", StatisticsMean);
ComputeStatistics("median", StatisticsMedian);
ComputeStatistics("stddev", StatisticsStdDev);
ComputeStatistics("cv", StatisticsCV, kPercentage);
}
Benchmark::~Benchmark() {}
Benchmark* Benchmark::Name(const std::string& name) {
SetName(name);
return this;
}
Benchmark* Benchmark::Arg(int64_t x) {
CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
args_.push_back({x});
return this;
}
Benchmark* Benchmark::Unit(TimeUnit unit) {
time_unit_ = unit;
use_default_time_unit_ = false;
return this;
}
Benchmark* Benchmark::Range(int64_t start, int64_t limit) {
CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
std::vector<int64_t> arglist;
AddRange(&arglist, start, limit, range_multiplier_);
@@ -277,54 +263,61 @@ Benchmark* Benchmark::Range(int64_t start, int64_t limit) {
Benchmark* Benchmark::Ranges(
const std::vector<std::pair<int64_t, int64_t>>& ranges) {
CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size()));
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size()));
std::vector<std::vector<int64_t>> arglists(ranges.size());
std::size_t total = 1;
for (std::size_t i = 0; i < ranges.size(); i++) {
AddRange(&arglists[i], ranges[i].first, ranges[i].second,
range_multiplier_);
total *= arglists[i].size();
}
std::vector<std::size_t> ctr(arglists.size(), 0);
ArgsProduct(arglists);
return this;
}
Benchmark* Benchmark::ArgsProduct(
const std::vector<std::vector<int64_t>>& arglists) {
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(arglists.size()));
std::vector<std::size_t> indices(arglists.size());
const std::size_t total = std::accumulate(
std::begin(arglists), std::end(arglists), std::size_t{1},
[](const std::size_t res, const std::vector<int64_t>& arglist) {
return res * arglist.size();
});
std::vector<int64_t> args;
args.reserve(arglists.size());
for (std::size_t i = 0; i < total; i++) {
std::vector<int64_t> tmp;
tmp.reserve(arglists.size());
for (std::size_t j = 0; j < arglists.size(); j++) {
tmp.push_back(arglists[j].at(ctr[j]));
for (std::size_t arg = 0; arg < arglists.size(); arg++) {
args.push_back(arglists[arg][indices[arg]]);
}
args_.push_back(args);
args.clear();
args_.push_back(std::move(tmp));
for (std::size_t j = 0; j < arglists.size(); j++) {
if (ctr[j] + 1 < arglists[j].size()) {
++ctr[j];
break;
}
ctr[j] = 0;
}
std::size_t arg = 0;
do {
indices[arg] = (indices[arg] + 1) % arglists[arg].size();
} while (indices[arg++] == 0 && arg < arglists.size());
}
return this;
}
Benchmark* Benchmark::ArgName(const std::string& name) {
CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
arg_names_ = {name};
return this;
}
Benchmark* Benchmark::ArgNames(const std::vector<std::string>& names) {
CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(names.size()));
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(names.size()));
arg_names_ = names;
return this;
}
Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) {
CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
CHECK_GE(start, 0);
CHECK_LE(start, limit);
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
BM_CHECK_LE(start, limit);
for (int64_t arg = start; arg <= limit; arg += step) {
args_.push_back({arg});
}
@@ -332,7 +325,7 @@ Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) {
}
Benchmark* Benchmark::Args(const std::vector<int64_t>& args) {
CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(args.size()));
BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(args.size()));
args_.push_back(args);
return this;
}
@@ -342,28 +335,60 @@ Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) {
return this;
}
Benchmark* Benchmark::Setup(callback_function&& setup) {
BM_CHECK(setup != nullptr);
setup_ = std::forward<callback_function>(setup);
return this;
}
Benchmark* Benchmark::Setup(const callback_function& setup) {
BM_CHECK(setup != nullptr);
setup_ = setup;
return this;
}
Benchmark* Benchmark::Teardown(callback_function&& teardown) {
BM_CHECK(teardown != nullptr);
teardown_ = std::forward<callback_function>(teardown);
return this;
}
Benchmark* Benchmark::Teardown(const callback_function& teardown) {
BM_CHECK(teardown != nullptr);
teardown_ = teardown;
return this;
}
Benchmark* Benchmark::RangeMultiplier(int multiplier) {
CHECK(multiplier > 1);
BM_CHECK(multiplier > 1);
range_multiplier_ = multiplier;
return this;
}
Benchmark* Benchmark::MinTime(double t) {
CHECK(t > 0.0);
CHECK(iterations_ == 0);
BM_CHECK(t > 0.0);
BM_CHECK(iterations_ == 0);
min_time_ = t;
return this;
}
Benchmark* Benchmark::Iterations(size_t n) {
CHECK(n > 0);
CHECK(IsZero(min_time_));
Benchmark* Benchmark::MinWarmUpTime(double t) {
BM_CHECK(t >= 0.0);
BM_CHECK(iterations_ == 0);
min_warmup_time_ = t;
return this;
}
Benchmark* Benchmark::Iterations(IterationCount n) {
BM_CHECK(n > 0);
BM_CHECK(IsZero(min_time_));
BM_CHECK(IsZero(min_warmup_time_));
iterations_ = n;
return this;
}
Benchmark* Benchmark::Repetitions(int n) {
CHECK(n > 0);
BM_CHECK(n > 0);
repetitions_ = n;
return this;
}
@@ -389,15 +414,21 @@ Benchmark* Benchmark::DisplayAggregatesOnly(bool value) {
return this;
}
Benchmark* Benchmark::MeasureProcessCPUTime() {
// Can be used together with UseRealTime() / UseManualTime().
measure_process_cpu_time_ = true;
return this;
}
Benchmark* Benchmark::UseRealTime() {
CHECK(!use_manual_time_)
BM_CHECK(!use_manual_time_)
<< "Cannot set UseRealTime and UseManualTime simultaneously.";
use_real_time_ = true;
return this;
}
Benchmark* Benchmark::UseManualTime() {
CHECK(!use_real_time_)
BM_CHECK(!use_real_time_)
<< "Cannot set UseRealTime and UseManualTime simultaneously.";
use_manual_time_ = true;
return this;
@@ -414,21 +445,22 @@ Benchmark* Benchmark::Complexity(BigOFunc* complexity) {
return this;
}
Benchmark* Benchmark::ComputeStatistics(std::string name,
StatisticsFunc* statistics) {
statistics_.emplace_back(name, statistics);
Benchmark* Benchmark::ComputeStatistics(const std::string& name,
StatisticsFunc* statistics,
StatisticUnit unit) {
statistics_.emplace_back(name, statistics, unit);
return this;
}
Benchmark* Benchmark::Threads(int t) {
CHECK_GT(t, 0);
BM_CHECK_GT(t, 0);
thread_counts_.push_back(t);
return this;
}
Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) {
CHECK_GT(min_threads, 0);
CHECK_GE(max_threads, min_threads);
BM_CHECK_GT(min_threads, 0);
BM_CHECK_GE(max_threads, min_threads);
AddRange(&thread_counts_, min_threads, max_threads, 2);
return this;
@@ -436,9 +468,9 @@ Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) {
Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads,
int stride) {
CHECK_GT(min_threads, 0);
CHECK_GE(max_threads, min_threads);
CHECK_GE(stride, 1);
BM_CHECK_GT(min_threads, 0);
BM_CHECK_GE(max_threads, min_threads);
BM_CHECK_GE(stride, 1);
for (auto i = min_threads; i < max_threads; i += stride) {
thread_counts_.push_back(i);
@@ -452,16 +484,36 @@ Benchmark* Benchmark::ThreadPerCpu() {
return this;
}
void Benchmark::SetName(const char* name) { name_ = name; }
Benchmark* Benchmark::ThreadRunner(threadrunner_factory&& factory) {
threadrunner_ = std::move(factory);
return this;
}
void Benchmark::SetName(const std::string& name) { name_ = name; }
const char* Benchmark::GetName() const { return name_.c_str(); }
int Benchmark::ArgsCnt() const {
if (args_.empty()) {
if (arg_names_.empty()) return -1;
if (arg_names_.empty()) {
return -1;
}
return static_cast<int>(arg_names_.size());
}
return static_cast<int>(args_.front().size());
}
const char* Benchmark::GetArgName(int arg) const {
BM_CHECK_GE(arg, 0);
size_t uarg = static_cast<size_t>(arg);
BM_CHECK_LT(uarg, arg_names_.size());
return arg_names_[uarg].c_str();
}
TimeUnit Benchmark::GetTimeUnit() const {
return use_default_time_unit_ ? GetDefaultTimeUnit() : time_unit_;
}
//=============================================================================//
// FunctionBenchmark
//=============================================================================//
@@ -474,4 +526,19 @@ void ClearRegisteredBenchmarks() {
internal::BenchmarkFamilies::GetInstance()->ClearBenchmarks();
}
std::vector<int64_t> CreateRange(int64_t lo, int64_t hi, int multi) {
std::vector<int64_t> args;
internal::AddRange(&args, lo, hi, multi);
return args;
}
std::vector<int64_t> CreateDenseRange(int64_t start, int64_t limit, int step) {
BM_CHECK_LE(start, limit);
std::vector<int64_t> args;
for (int64_t arg = start; arg <= limit; arg += step) {
args.push_back(arg);
}
return args;
}
} // end namespace benchmark

View File

@@ -1,33 +1,109 @@
#ifndef BENCHMARK_REGISTER_H
#define BENCHMARK_REGISTER_H
#include <algorithm>
#include <limits>
#include <vector>
#include "check.h"
namespace benchmark {
namespace internal {
// Append the powers of 'mult' in the closed interval [lo, hi].
// Returns iterator to the start of the inserted range.
template <typename T>
typename std::vector<T>::iterator AddPowers(std::vector<T>* dst, T lo, T hi,
int mult) {
BM_CHECK_GE(lo, 0);
BM_CHECK_GE(hi, lo);
BM_CHECK_GE(mult, 2);
const size_t start_offset = dst->size();
static const T kmax = std::numeric_limits<T>::max();
// Space out the values in multiples of "mult"
for (T i = static_cast<T>(1); i <= hi; i = static_cast<T>(i * mult)) {
if (i >= lo) {
dst->push_back(i);
}
// Break the loop here since multiplying by
// 'mult' would move outside of the range of T
if (i > kmax / mult) break;
}
return dst->begin() + static_cast<int>(start_offset);
}
template <typename T>
void AddNegatedPowers(std::vector<T>* dst, T lo, T hi, int mult) {
// We negate lo and hi so we require that they cannot be equal to 'min'.
BM_CHECK_GT(lo, std::numeric_limits<T>::min());
BM_CHECK_GT(hi, std::numeric_limits<T>::min());
BM_CHECK_GE(hi, lo);
BM_CHECK_LE(hi, 0);
// Add positive powers, then negate and reverse.
// Casts necessary since small integers get promoted
// to 'int' when negating.
const auto lo_complement = static_cast<T>(-lo);
const auto hi_complement = static_cast<T>(-hi);
const auto it = AddPowers(dst, hi_complement, lo_complement, mult);
std::for_each(it, dst->end(), [](T& t) { t = static_cast<T>(t * -1); });
std::reverse(it, dst->end());
}
template <typename T>
void AddRange(std::vector<T>* dst, T lo, T hi, int mult) {
CHECK_GE(lo, 0);
CHECK_GE(hi, lo);
CHECK_GE(mult, 2);
static_assert(std::is_integral<T>::value && std::is_signed<T>::value,
"Args type must be a signed integer");
BM_CHECK_GE(hi, lo);
BM_CHECK_GE(mult, 2);
// Add "lo"
dst->push_back(lo);
static const T kmax = std::numeric_limits<T>::max();
// Handle lo == hi as a special case, so we then know
// lo < hi and so it is safe to add 1 to lo and subtract 1
// from hi without falling outside of the range of T.
if (lo == hi) return;
// Now space out the benchmarks in multiples of "mult"
for (T i = 1; i < kmax / mult; i *= mult) {
if (i >= hi) break;
if (i > lo) {
dst->push_back(i);
}
// Ensure that lo_inner <= hi_inner below.
if (lo + 1 == hi) {
dst->push_back(hi);
return;
}
// Add "hi" (if different from "lo")
if (hi != lo) {
// Add all powers of 'mult' in the range [lo+1, hi-1] (inclusive).
const auto lo_inner = static_cast<T>(lo + 1);
const auto hi_inner = static_cast<T>(hi - 1);
// Insert negative values
if (lo_inner < 0) {
AddNegatedPowers(dst, lo_inner, std::min(hi_inner, T{-1}), mult);
}
// Treat 0 as a special case (see discussion on #762).
if (lo < 0 && hi >= 0) {
dst->push_back(0);
}
// Insert positive values
if (hi_inner > 0) {
AddPowers(dst, std::max(lo_inner, T{1}), hi_inner, mult);
}
// Add "hi" (if different from last value).
if (hi != dst->back()) {
dst->push_back(hi);
}
}
} // namespace internal
} // namespace benchmark
#endif // BENCHMARK_REGISTER_H

View File

@@ -13,12 +13,13 @@
// limitations under the License.
#include "benchmark_runner.h"
#include "benchmark/benchmark.h"
#include "benchmark_api_internal.h"
#include "internal_macros.h"
#ifndef BENCHMARK_OS_WINDOWS
#ifndef BENCHMARK_OS_FUCHSIA
#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT)
#include <sys/resource.h>
#endif
#include <sys/time.h>
@@ -27,11 +28,15 @@
#include <algorithm>
#include <atomic>
#include <climits>
#include <cmath>
#include <condition_variable>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <functional>
#include <iostream>
#include <limits>
#include <memory>
#include <string>
#include <thread>
@@ -42,9 +47,9 @@
#include "commandlineflags.h"
#include "complexity.h"
#include "counter.h"
#include "internal_macros.h"
#include "log.h"
#include "mutex.h"
#include "perf_counters.h"
#include "re.h"
#include "statistics.h"
#include "string_util.h"
@@ -53,64 +58,95 @@
namespace benchmark {
BM_DECLARE_bool(benchmark_dry_run);
BM_DECLARE_string(benchmark_min_time);
BM_DECLARE_double(benchmark_min_warmup_time);
BM_DECLARE_int32(benchmark_repetitions);
BM_DECLARE_bool(benchmark_report_aggregates_only);
BM_DECLARE_bool(benchmark_display_aggregates_only);
BM_DECLARE_string(benchmark_perf_counters);
namespace internal {
MemoryManager* memory_manager = nullptr;
ProfilerManager* profiler_manager = nullptr;
namespace {
static const size_t kMaxIterations = 1000000000;
constexpr IterationCount kMaxIterations = 1000000000000;
const double kDefaultMinTime =
std::strtod(::benchmark::kDefaultMinTimeStr, /*p_end*/ nullptr);
BenchmarkReporter::Run CreateRunReport(
const benchmark::internal::BenchmarkInstance& b,
const internal::ThreadManager::Result& results, size_t memory_iterations,
const MemoryManager::Result& memory_result, double seconds) {
const internal::ThreadManager::Result& results,
IterationCount memory_iterations,
const MemoryManager::Result& memory_result, double seconds,
int64_t repetition_index, int64_t repeats) {
// Create report about this benchmark run.
BenchmarkReporter::Run report;
report.run_name = b.name;
report.error_occurred = results.has_error_;
report.error_message = results.error_message_;
report.run_name = b.name();
report.family_index = b.family_index();
report.per_family_instance_index = b.per_family_instance_index();
report.skipped = results.skipped_;
report.skip_message = results.skip_message_;
report.report_label = results.report_label_;
// This is the total iterations across all threads.
report.iterations = results.iterations;
report.time_unit = b.time_unit;
report.time_unit = b.time_unit();
report.threads = b.threads();
report.repetition_index = repetition_index;
report.repetitions = repeats;
if (!report.error_occurred) {
if (b.use_manual_time) {
if (report.skipped == 0u) {
if (b.use_manual_time()) {
report.real_accumulated_time = results.manual_time_used;
} else {
report.real_accumulated_time = results.real_time_used;
}
report.use_real_time_for_initial_big_o = b.use_manual_time();
report.cpu_accumulated_time = results.cpu_time_used;
report.complexity_n = results.complexity_n;
report.complexity = b.complexity;
report.complexity_lambda = b.complexity_lambda;
report.statistics = b.statistics;
report.complexity = b.complexity();
report.complexity_lambda = b.complexity_lambda();
report.statistics = &b.statistics();
report.counters = results.counters;
if (memory_iterations > 0) {
report.has_memory_result = true;
report.memory_result = memory_result;
report.allocs_per_iter =
memory_iterations ? static_cast<double>(memory_result.num_allocs) /
memory_iterations
: 0;
report.max_bytes_used = memory_result.max_bytes_used;
memory_iterations != 0
? static_cast<double>(memory_result.num_allocs) /
static_cast<double>(memory_iterations)
: 0;
}
internal::Finish(&report.counters, results.iterations, seconds, b.threads);
internal::Finish(&report.counters, results.iterations, seconds,
b.threads());
}
return report;
}
// Execute one thread of benchmark b for the specified number of iterations.
// Adds the stats collected for the thread into *total.
void RunInThread(const BenchmarkInstance* b, size_t iters, int thread_id,
ThreadManager* manager) {
internal::ThreadTimer timer;
State st = b->Run(iters, thread_id, &timer, manager);
CHECK(st.iterations() >= st.max_iterations)
<< "Benchmark returned before State::KeepRunning() returned false!";
// Adds the stats collected for the thread into manager->results.
void RunInThread(const BenchmarkInstance* b, IterationCount iters,
int thread_id, ThreadManager* manager,
PerfCountersMeasurement* perf_counters_measurement,
ProfilerManager* profiler_manager_) {
internal::ThreadTimer timer(
b->measure_process_cpu_time()
? internal::ThreadTimer::CreateProcessCpuTime()
: internal::ThreadTimer::Create());
State st = b->Run(iters, thread_id, &timer, manager,
perf_counters_measurement, profiler_manager_);
if (!(st.skipped() || st.iterations() >= st.max_iterations)) {
st.SkipWithError(
"The benchmark didn't run, nor was it explicitly skipped. Please call "
"'SkipWithXXX` in your benchmark as appropriate.");
}
{
MutexLock l(manager->GetBenchmarkMutex());
internal::ThreadManager::Result& results = manager->results;
@@ -124,225 +160,412 @@ void RunInThread(const BenchmarkInstance* b, size_t iters, int thread_id,
manager->NotifyThreadComplete();
}
class BenchmarkRunner {
public:
BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
std::vector<BenchmarkReporter::Run>* complexity_reports_)
: b(b_),
complexity_reports(*complexity_reports_),
min_time(!IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time),
repeats(b.repetitions != 0 ? b.repetitions
: FLAGS_benchmark_repetitions),
has_explicit_iteration_count(b.iterations != 0),
pool(b.threads - 1),
iters(has_explicit_iteration_count ? b.iterations : 1) {
run_results.display_report_aggregates_only =
(FLAGS_benchmark_report_aggregates_only ||
FLAGS_benchmark_display_aggregates_only);
run_results.file_report_aggregates_only =
FLAGS_benchmark_report_aggregates_only;
if (b.aggregation_report_mode != internal::ARM_Unspecified) {
run_results.display_report_aggregates_only =
(b.aggregation_report_mode &
internal::ARM_DisplayReportAggregatesOnly);
run_results.file_report_aggregates_only =
(b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly);
}
for (int repetition_num = 0; repetition_num < repeats; repetition_num++) {
const bool is_the_first_repetition = repetition_num == 0;
DoOneRepetition(is_the_first_repetition);
}
// Calculate additional statistics
run_results.aggregates_only = ComputeStats(run_results.non_aggregates);
// Maybe calculate complexity report
if ((b.complexity != oNone) && b.last_benchmark_instance) {
auto additional_run_stats = ComputeBigO(complexity_reports);
run_results.aggregates_only.insert(run_results.aggregates_only.end(),
additional_run_stats.begin(),
additional_run_stats.end());
complexity_reports.clear();
}
double ComputeMinTime(const benchmark::internal::BenchmarkInstance& b,
const BenchTimeType& iters_or_time) {
if (!IsZero(b.min_time())) {
return b.min_time();
}
// If the flag was used to specify number of iters, then return the default
// min_time.
if (iters_or_time.tag == BenchTimeType::ITERS) {
return kDefaultMinTime;
}
RunResults&& get_results() { return std::move(run_results); }
return iters_or_time.time;
}
private:
RunResults run_results;
IterationCount ComputeIters(const benchmark::internal::BenchmarkInstance& b,
const BenchTimeType& iters_or_time) {
if (b.iterations() != 0) {
return b.iterations();
}
const benchmark::internal::BenchmarkInstance& b;
std::vector<BenchmarkReporter::Run>& complexity_reports;
// We've already concluded that this flag is currently used to pass
// iters but do a check here again anyway.
BM_CHECK(iters_or_time.tag == BenchTimeType::ITERS);
return iters_or_time.iters;
}
const double min_time;
const int repeats;
const bool has_explicit_iteration_count;
std::vector<std::thread> pool;
size_t iters; // preserved between repetitions!
// So only the first repetition has to find/calculate it,
// the other repetitions will just use that precomputed iteration count.
struct IterationResults {
internal::ThreadManager::Result results;
size_t iters;
double seconds;
};
IterationResults DoNIterations() {
VLOG(2) << "Running " << b.name << " for " << iters << "\n";
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(b.threads));
class ThreadRunnerDefault : public ThreadRunnerBase {
public:
explicit ThreadRunnerDefault(int num_threads)
: pool(static_cast<size_t>(num_threads - 1)) {}
void RunThreads(const std::function<void(int)>& fn) final {
// Run all but one thread in separate threads
for (std::size_t ti = 0; ti < pool.size(); ++ti) {
pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1),
manager.get());
pool[ti] = std::thread(fn, static_cast<int>(ti + 1));
}
// And run one thread here directly.
// (If we were asked to run just one thread, we don't create new threads.)
// Yes, we need to do this here *after* we start the separate threads.
RunInThread(&b, iters, 0, manager.get());
fn(0);
// The main thread has finished. Now let's wait for the other threads.
manager->WaitForAllThreads();
for (std::thread& thread : pool) thread.join();
IterationResults i;
// Acquire the measurements/counters from the manager, UNDER THE LOCK!
{
MutexLock l(manager->GetBenchmarkMutex());
i.results = manager->results;
for (std::thread& thread : pool) {
thread.join();
}
// And get rid of the manager.
manager.reset();
// Adjust real/manual time stats since they were reported per thread.
i.results.real_time_used /= b.threads;
i.results.manual_time_used /= b.threads;
VLOG(2) << "Ran in " << i.results.cpu_time_used << "/"
<< i.results.real_time_used << "\n";
// So for how long were we running?
i.iters = iters;
// Base decisions off of real time if requested by this benchmark.
i.seconds = i.results.cpu_time_used;
if (b.use_manual_time) {
i.seconds = i.results.manual_time_used;
} else if (b.use_real_time) {
i.seconds = i.results.real_time_used;
}
return i;
}
size_t PredictNumItersNeeded(const IterationResults& i) const {
// See how much iterations should be increased by.
// Note: Avoid division by zero with max(seconds, 1ns).
double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9);
// If our last run was at least 10% of FLAGS_benchmark_min_time then we
// use the multiplier directly.
// Otherwise we use at most 10 times expansion.
// NOTE: When the last run was at least 10% of the min time the max
// expansion should be 14x.
bool is_significant = (i.seconds / min_time) > 0.1;
multiplier = is_significant ? multiplier : std::min(10.0, multiplier);
if (multiplier <= 1.0) multiplier = 2.0;
// So what seems to be the sufficiently-large iteration count? Round up.
const size_t max_next_iters =
0.5 + std::max(multiplier * i.iters, i.iters + 1.0);
// But we do have *some* sanity limits though..
const size_t next_iters = std::min(max_next_iters, kMaxIterations);
VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
return next_iters; // round up before conversion to integer.
}
bool ShouldReportIterationResults(const IterationResults& i) const {
// Determine if this run should be reported;
// Either it has run for a sufficient amount of time
// or because an error was reported.
return i.results.has_error_ ||
i.iters >= kMaxIterations || // Too many iterations already.
i.seconds >= min_time || // The elapsed time is large enough.
// CPU time is specified but the elapsed real time greatly exceeds
// the minimum time.
// Note that user provided timers are except from this sanity check.
((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time);
}
void DoOneRepetition(bool is_the_first_repetition) {
IterationResults i;
// We *may* be gradually increasing the length (iteration count)
// of the benchmark until we decide the results are significant.
// And once we do, we report those last results and exit.
// Please do note that the if there are repetitions, the iteration count
// is *only* calculated for the *first* repetition, and other repetitions
// simply use that precomputed iteration count.
for (;;) {
i = DoNIterations();
// Do we consider the results to be significant?
// If we are doing repetitions, and the first repetition was already done,
// it has calculated the correct iteration time, so we have run that very
// iteration count just now. No need to calculate anything. Just report.
// Else, the normal rules apply.
const bool results_are_significant = !is_the_first_repetition ||
has_explicit_iteration_count ||
ShouldReportIterationResults(i);
if (results_are_significant) break; // Good, let's report them!
// Nope, bad iteration. Let's re-estimate the hopefully-sufficient
// iteration count, and run the benchmark again...
iters = PredictNumItersNeeded(i);
assert(iters > i.iters &&
"if we did more iterations than we want to do the next time, "
"then we should have accepted the current iteration run.");
}
// Oh, one last thing, we need to also produce the 'memory measurements'..
MemoryManager::Result memory_result;
size_t memory_iterations = 0;
if (memory_manager != nullptr) {
// Only run a few iterations to reduce the impact of one-time
// allocations in benchmarks that are not properly managed.
memory_iterations = std::min<size_t>(16, iters);
memory_manager->Start();
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(1));
RunInThread(&b, memory_iterations, 0, manager.get());
manager->WaitForAllThreads();
manager.reset();
memory_manager->Stop(&memory_result);
}
// Ok, now actualy report.
BenchmarkReporter::Run report = CreateRunReport(
b, i.results, memory_iterations, memory_result, i.seconds);
if (!report.error_occurred && b.complexity != oNone)
complexity_reports.push_back(report);
run_results.non_aggregates.push_back(report);
}
private:
std::vector<std::thread> pool;
};
std::unique_ptr<ThreadRunnerBase> GetThreadRunner(
const threadrunner_factory& userThreadRunnerFactory, int num_threads) {
return userThreadRunnerFactory
? userThreadRunnerFactory(num_threads)
: std::make_unique<ThreadRunnerDefault>(num_threads);
}
} // end namespace
RunResults RunBenchmark(
const benchmark::internal::BenchmarkInstance& b,
std::vector<BenchmarkReporter::Run>* complexity_reports) {
internal::BenchmarkRunner r(b, complexity_reports);
return r.get_results();
BenchTimeType ParseBenchMinTime(const std::string& value) {
BenchTimeType ret = {};
if (value.empty()) {
ret.tag = BenchTimeType::TIME;
ret.time = 0.0;
return ret;
}
if (value.back() == 'x') {
char* p_end = nullptr;
// Reset errno before it's changed by strtol.
errno = 0;
IterationCount num_iters = std::strtol(value.c_str(), &p_end, 10);
// After a valid parse, p_end should have been set to
// point to the 'x' suffix.
BM_CHECK(errno == 0 && p_end != nullptr && *p_end == 'x')
<< "Malformed iters value passed to --benchmark_min_time: `" << value
<< "`. Expected --benchmark_min_time=<integer>x.";
ret.tag = BenchTimeType::ITERS;
ret.iters = num_iters;
return ret;
}
bool has_suffix = value.back() == 's';
if (!has_suffix) {
BM_VLOG(0) << "Value passed to --benchmark_min_time should have a suffix. "
"Eg., `30s` for 30-seconds.";
}
char* p_end = nullptr;
// Reset errno before it's changed by strtod.
errno = 0;
double min_time = std::strtod(value.c_str(), &p_end);
// After a successful parse, p_end should point to the suffix 's',
// or the end of the string if the suffix was omitted.
BM_CHECK(errno == 0 && p_end != nullptr &&
((has_suffix && *p_end == 's') || *p_end == '\0'))
<< "Malformed seconds value passed to --benchmark_min_time: `" << value
<< "`. Expected --benchmark_min_time=<float>x.";
ret.tag = BenchTimeType::TIME;
ret.time = min_time;
return ret;
}
BenchmarkRunner::BenchmarkRunner(
const benchmark::internal::BenchmarkInstance& b_,
PerfCountersMeasurement* pcm_,
BenchmarkReporter::PerFamilyRunReports* reports_for_family_)
: b(b_),
reports_for_family(reports_for_family_),
parsed_benchtime_flag(ParseBenchMinTime(FLAGS_benchmark_min_time)),
min_time(FLAGS_benchmark_dry_run
? 0
: ComputeMinTime(b_, parsed_benchtime_flag)),
min_warmup_time(
FLAGS_benchmark_dry_run
? 0
: ((!IsZero(b.min_time()) && b.min_warmup_time() > 0.0)
? b.min_warmup_time()
: FLAGS_benchmark_min_warmup_time)),
warmup_done(FLAGS_benchmark_dry_run ? true : !(min_warmup_time > 0.0)),
repeats(FLAGS_benchmark_dry_run
? 1
: (b.repetitions() != 0 ? b.repetitions()
: FLAGS_benchmark_repetitions)),
has_explicit_iteration_count(b.iterations() != 0 ||
parsed_benchtime_flag.tag ==
BenchTimeType::ITERS),
thread_runner(
GetThreadRunner(b.GetUserThreadRunnerFactory(), b.threads())),
iters(FLAGS_benchmark_dry_run
? 1
: (has_explicit_iteration_count
? ComputeIters(b_, parsed_benchtime_flag)
: 1)),
perf_counters_measurement_ptr(pcm_) {
run_results.display_report_aggregates_only =
(FLAGS_benchmark_report_aggregates_only ||
FLAGS_benchmark_display_aggregates_only);
run_results.file_report_aggregates_only =
FLAGS_benchmark_report_aggregates_only;
if (b.aggregation_report_mode() != internal::ARM_Unspecified) {
run_results.display_report_aggregates_only =
((b.aggregation_report_mode() &
internal::ARM_DisplayReportAggregatesOnly) != 0u);
run_results.file_report_aggregates_only =
((b.aggregation_report_mode() &
internal::ARM_FileReportAggregatesOnly) != 0u);
BM_CHECK(FLAGS_benchmark_perf_counters.empty() ||
(perf_counters_measurement_ptr->num_counters() == 0))
<< "Perf counters were requested but could not be set up.";
}
}
BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() {
BM_VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n";
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(b.threads()));
thread_runner->RunThreads([&](int thread_idx) {
RunInThread(&b, iters, thread_idx, manager.get(),
perf_counters_measurement_ptr, /*profiler_manager=*/nullptr);
});
IterationResults i;
// Acquire the measurements/counters from the manager, UNDER THE LOCK!
{
MutexLock l(manager->GetBenchmarkMutex());
i.results = manager->results;
}
// And get rid of the manager.
manager.reset();
BM_VLOG(2) << "Ran in " << i.results.cpu_time_used << "/"
<< i.results.real_time_used << "\n";
// By using KeepRunningBatch a benchmark can iterate more times than
// requested, so take the iteration count from i.results.
i.iters = i.results.iterations / b.threads();
// Base decisions off of real time if requested by this benchmark.
i.seconds = i.results.cpu_time_used;
if (b.use_manual_time()) {
i.seconds = i.results.manual_time_used;
} else if (b.use_real_time()) {
i.seconds = i.results.real_time_used;
}
return i;
}
IterationCount BenchmarkRunner::PredictNumItersNeeded(
const IterationResults& i) const {
// See how much iterations should be increased by.
// Note: Avoid division by zero with max(seconds, 1ns).
double multiplier = GetMinTimeToApply() * 1.4 / std::max(i.seconds, 1e-9);
// If our last run was at least 10% of FLAGS_benchmark_min_time then we
// use the multiplier directly.
// Otherwise we use at most 10 times expansion.
// NOTE: When the last run was at least 10% of the min time the max
// expansion should be 14x.
const bool is_significant = (i.seconds / GetMinTimeToApply()) > 0.1;
multiplier = is_significant ? multiplier : 10.0;
// So what seems to be the sufficiently-large iteration count? Round up.
const IterationCount max_next_iters = static_cast<IterationCount>(
std::llround(std::max(multiplier * static_cast<double>(i.iters),
static_cast<double>(i.iters) + 1.0)));
// But we do have *some* limits though..
const IterationCount next_iters = std::min(max_next_iters, kMaxIterations);
BM_VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
return next_iters; // round up before conversion to integer.
}
bool BenchmarkRunner::ShouldReportIterationResults(
const IterationResults& i) const {
// Determine if this run should be reported;
// Either it has run for a sufficient amount of time
// or because an error was reported.
return (i.results.skipped_ != 0u) || FLAGS_benchmark_dry_run ||
i.iters >= kMaxIterations || // Too many iterations already.
i.seconds >=
GetMinTimeToApply() || // The elapsed time is large enough.
// CPU time is specified but the elapsed real time greatly exceeds
// the minimum time.
// Note that user provided timers are except from this test.
((i.results.real_time_used >= 5 * GetMinTimeToApply()) &&
!b.use_manual_time());
}
double BenchmarkRunner::GetMinTimeToApply() const {
// In order to re-use functionality to run and measure benchmarks for running
// a warmup phase of the benchmark, we need a way of telling whether to apply
// min_time or min_warmup_time. This function will figure out if we are in the
// warmup phase and therefore need to apply min_warmup_time or if we already
// in the benchmarking phase and min_time needs to be applied.
return warmup_done ? min_time : min_warmup_time;
}
void BenchmarkRunner::FinishWarmUp(const IterationCount& i) {
warmup_done = true;
iters = i;
}
void BenchmarkRunner::RunWarmUp() {
// Use the same mechanisms for warming up the benchmark as used for actually
// running and measuring the benchmark.
IterationResults i_warmup;
// Dont use the iterations determined in the warmup phase for the actual
// measured benchmark phase. While this may be a good starting point for the
// benchmark and it would therefore get rid of the need to figure out how many
// iterations are needed if min_time is set again, this may also be a complete
// wrong guess since the warmup loops might be considerably slower (e.g
// because of caching effects).
const IterationCount i_backup = iters;
for (;;) {
b.Setup();
i_warmup = DoNIterations();
b.Teardown();
const bool finish = ShouldReportIterationResults(i_warmup);
if (finish) {
FinishWarmUp(i_backup);
break;
}
// Although we are running "only" a warmup phase where running enough
// iterations at once without measuring time isn't as important as it is for
// the benchmarking phase, we still do it the same way as otherwise it is
// very confusing for the user to know how to choose a proper value for
// min_warmup_time if a different approach on running it is used.
iters = PredictNumItersNeeded(i_warmup);
assert(iters > i_warmup.iters &&
"if we did more iterations than we want to do the next time, "
"then we should have accepted the current iteration run.");
}
}
MemoryManager::Result BenchmarkRunner::RunMemoryManager(
IterationCount memory_iterations) {
memory_manager->Start();
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(1));
b.Setup();
RunInThread(&b, memory_iterations, 0, manager.get(),
perf_counters_measurement_ptr,
/*profiler_manager=*/nullptr);
manager.reset();
b.Teardown();
MemoryManager::Result memory_result;
memory_manager->Stop(memory_result);
memory_result.memory_iterations = memory_iterations;
return memory_result;
}
void BenchmarkRunner::RunProfilerManager(IterationCount profile_iterations) {
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(1));
b.Setup();
RunInThread(&b, profile_iterations, 0, manager.get(),
/*perf_counters_measurement_ptr=*/nullptr,
/*profiler_manager=*/profiler_manager);
manager.reset();
b.Teardown();
}
void BenchmarkRunner::DoOneRepetition() {
assert(HasRepeatsRemaining() && "Already done all repetitions?");
const bool is_the_first_repetition = num_repetitions_done == 0;
// In case a warmup phase is requested by the benchmark, run it now.
// After running the warmup phase the BenchmarkRunner should be in a state as
// this warmup never happened except the fact that warmup_done is set. Every
// other manipulation of the BenchmarkRunner instance would be a bug! Please
// fix it.
if (!warmup_done) {
RunWarmUp();
}
IterationResults i;
// We *may* be gradually increasing the length (iteration count)
// of the benchmark until we decide the results are significant.
// And once we do, we report those last results and exit.
// Please do note that the if there are repetitions, the iteration count
// is *only* calculated for the *first* repetition, and other repetitions
// simply use that precomputed iteration count.
for (;;) {
b.Setup();
i = DoNIterations();
b.Teardown();
// Do we consider the results to be significant?
// If we are doing repetitions, and the first repetition was already done,
// it has calculated the correct iteration time, so we have run that very
// iteration count just now. No need to calculate anything. Just report.
// Else, the normal rules apply.
const bool results_are_significant = !is_the_first_repetition ||
has_explicit_iteration_count ||
ShouldReportIterationResults(i);
// Good, let's report them!
if (results_are_significant) {
break;
}
// Nope, bad iteration. Let's re-estimate the hopefully-sufficient
// iteration count, and run the benchmark again...
iters = PredictNumItersNeeded(i);
assert(iters > i.iters &&
"if we did more iterations than we want to do the next time, "
"then we should have accepted the current iteration run.");
}
// Produce memory measurements if requested.
MemoryManager::Result memory_result;
IterationCount memory_iterations = 0;
if (memory_manager != nullptr) {
// Only run a few iterations to reduce the impact of one-time
// allocations in benchmarks that are not properly managed.
memory_iterations = std::min<IterationCount>(16, iters);
memory_result = RunMemoryManager(memory_iterations);
}
if (profiler_manager != nullptr) {
// We want to externally profile the benchmark for the same number of
// iterations because, for example, if we're tracing the benchmark then we
// want trace data to reasonably match PMU data.
RunProfilerManager(iters);
}
// Ok, now actually report.
BenchmarkReporter::Run report =
CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds,
num_repetitions_done, repeats);
if (reports_for_family != nullptr) {
++reports_for_family->num_runs_done;
if (report.skipped == 0u) {
reports_for_family->Runs.push_back(report);
}
}
run_results.non_aggregates.push_back(report);
++num_repetitions_done;
}
RunResults&& BenchmarkRunner::GetResults() {
assert(!HasRepeatsRemaining() && "Did not run all repetitions yet?");
// Calculate additional statistics over the repetitions of this instance.
run_results.aggregates_only = ComputeStats(run_results.non_aggregates);
return std::move(run_results);
}
} // end namespace internal

View File

@@ -15,22 +15,20 @@
#ifndef BENCHMARK_RUNNER_H_
#define BENCHMARK_RUNNER_H_
#include <memory>
#include <thread>
#include <vector>
#include "benchmark_api_internal.h"
#include "internal_macros.h"
DECLARE_double(benchmark_min_time);
DECLARE_int32(benchmark_repetitions);
DECLARE_bool(benchmark_report_aggregates_only);
DECLARE_bool(benchmark_display_aggregates_only);
#include "perf_counters.h"
#include "thread_manager.h"
namespace benchmark {
namespace internal {
extern MemoryManager* memory_manager;
extern ProfilerManager* profiler_manager;
struct RunResults {
std::vector<BenchmarkReporter::Run> non_aggregates;
@@ -40,9 +38,87 @@ struct RunResults {
bool file_report_aggregates_only = false;
};
RunResults RunBenchmark(
const benchmark::internal::BenchmarkInstance& b,
std::vector<BenchmarkReporter::Run>* complexity_reports);
struct BENCHMARK_EXPORT BenchTimeType {
enum { UNSPECIFIED, ITERS, TIME } tag;
union {
IterationCount iters;
double time;
};
};
BENCHMARK_EXPORT
BenchTimeType ParseBenchMinTime(const std::string& value);
class BenchmarkRunner {
public:
BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
benchmark::internal::PerfCountersMeasurement* pcm_,
BenchmarkReporter::PerFamilyRunReports* reports_for_family);
int GetNumRepeats() const { return repeats; }
bool HasRepeatsRemaining() const {
return GetNumRepeats() != num_repetitions_done;
}
void DoOneRepetition();
RunResults&& GetResults();
BenchmarkReporter::PerFamilyRunReports* GetReportsForFamily() const {
return reports_for_family;
}
double GetMinTime() const { return min_time; }
bool HasExplicitIters() const { return has_explicit_iteration_count; }
IterationCount GetIters() const { return iters; }
private:
RunResults run_results;
const benchmark::internal::BenchmarkInstance& b;
BenchmarkReporter::PerFamilyRunReports* reports_for_family;
BenchTimeType parsed_benchtime_flag;
const double min_time;
const double min_warmup_time;
bool warmup_done;
const int repeats;
const bool has_explicit_iteration_count;
int num_repetitions_done = 0;
std::unique_ptr<ThreadRunnerBase> thread_runner;
IterationCount iters; // preserved between repetitions!
// So only the first repetition has to find/calculate it,
// the other repetitions will just use that precomputed iteration count.
PerfCountersMeasurement* const perf_counters_measurement_ptr = nullptr;
struct IterationResults {
internal::ThreadManager::Result results;
IterationCount iters;
double seconds;
};
IterationResults DoNIterations();
MemoryManager::Result RunMemoryManager(IterationCount memory_iterations);
void RunProfilerManager(IterationCount profile_iterations);
IterationCount PredictNumItersNeeded(const IterationResults& i) const;
bool ShouldReportIterationResults(const IterationResults& i) const;
double GetMinTimeToApply() const;
void FinishWarmUp(const IterationCount& i);
void RunWarmUp();
};
} // namespace internal

14
third_party/benchmark/src/check.cc vendored Normal file
View File

@@ -0,0 +1,14 @@
#include "check.h"
namespace benchmark {
namespace internal {
namespace {
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
AbortHandlerT* handler = &std::abort;
} // namespace
BENCHMARK_EXPORT AbortHandlerT*& GetAbortHandler() { return handler; }
} // namespace internal
} // namespace benchmark

View File

@@ -4,30 +4,51 @@
#include <cmath>
#include <cstdlib>
#include <ostream>
#include <string_view>
#include "benchmark/export.h"
#include "internal_macros.h"
#include "log.h"
#if defined(__GNUC__) || defined(__clang__)
#define BENCHMARK_NOEXCEPT noexcept
#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
#elif defined(_MSC_VER) && !defined(__clang__)
#if _MSC_VER >= 1900
#define BENCHMARK_NOEXCEPT noexcept
#define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
#else
#define BENCHMARK_NOEXCEPT
#define BENCHMARK_NOEXCEPT_OP(x)
#endif
#define __func__ __FUNCTION__
#else
#define BENCHMARK_NOEXCEPT
#define BENCHMARK_NOEXCEPT_OP(x)
#endif
namespace benchmark {
namespace internal {
typedef void(AbortHandlerT)();
inline AbortHandlerT*& GetAbortHandler() {
static AbortHandlerT* handler = &std::abort;
return handler;
}
BENCHMARK_EXPORT
AbortHandlerT*& GetAbortHandler();
BENCHMARK_NORETURN inline void CallAbortHandler() {
GetAbortHandler()();
std::flush(std::cout);
std::flush(std::cerr);
std::abort(); // fallback to enforce noreturn
}
// CheckHandler is the class constructed by failing CHECK macros. CheckHandler
// will log information about the failures and abort when it is destructed.
// CheckHandler is the class constructed by failing BM_CHECK macros.
// CheckHandler will log information about the failures and abort when it is
// destructed.
class CheckHandler {
public:
CheckHandler(const char* check, const char* file, const char* func, int line)
CheckHandler(std::string_view check, std::string_view file,
std::string_view func, int line)
: log_(GetErrorLogInstance()) {
log_ << file << ":" << line << ": " << func << ": Check `" << check
<< "' failed. ";
@@ -35,10 +56,17 @@ class CheckHandler {
LogType& GetLog() { return log_; }
#if defined(COMPILER_MSVC)
#pragma warning(push)
#pragma warning(disable : 4722)
#endif
BENCHMARK_NORETURN ~CheckHandler() BENCHMARK_NOEXCEPT_OP(false) {
log_ << std::endl;
log_ << '\n';
CallAbortHandler();
}
#if defined(COMPILER_MSVC)
#pragma warning(pop)
#endif
CheckHandler& operator=(const CheckHandler&) = delete;
CheckHandler(const CheckHandler&) = delete;
@@ -51,32 +79,34 @@ class CheckHandler {
} // end namespace internal
} // end namespace benchmark
// The CHECK macro returns a std::ostream object that can have extra information
// written to it.
// The BM_CHECK macro returns a std::ostream object that can have extra
// information written to it.
#ifndef NDEBUG
#define CHECK(b) \
(b ? ::benchmark::internal::GetNullLogInstance() \
: ::benchmark::internal::CheckHandler(#b, __FILE__, __func__, __LINE__) \
#define BM_CHECK(b) \
(b ? ::benchmark::internal::GetNullLogInstance() \
: ::benchmark::internal::CheckHandler( \
std::string_view(#b), std::string_view(__FILE__), \
std::string_view(__func__), __LINE__) \
.GetLog())
#else
#define CHECK(b) ::benchmark::internal::GetNullLogInstance()
#define BM_CHECK(b) ::benchmark::internal::GetNullLogInstance()
#endif
// clang-format off
// preserve whitespacing between operators for alignment
#define CHECK_EQ(a, b) CHECK((a) == (b))
#define CHECK_NE(a, b) CHECK((a) != (b))
#define CHECK_GE(a, b) CHECK((a) >= (b))
#define CHECK_LE(a, b) CHECK((a) <= (b))
#define CHECK_GT(a, b) CHECK((a) > (b))
#define CHECK_LT(a, b) CHECK((a) < (b))
#define BM_CHECK_EQ(a, b) BM_CHECK((a) == (b))
#define BM_CHECK_NE(a, b) BM_CHECK((a) != (b))
#define BM_CHECK_GE(a, b) BM_CHECK((a) >= (b))
#define BM_CHECK_LE(a, b) BM_CHECK((a) <= (b))
#define BM_CHECK_GT(a, b) BM_CHECK((a) > (b))
#define BM_CHECK_LT(a, b) BM_CHECK((a) < (b))
#define CHECK_FLOAT_EQ(a, b, eps) CHECK(std::fabs((a) - (b)) < (eps))
#define CHECK_FLOAT_NE(a, b, eps) CHECK(std::fabs((a) - (b)) >= (eps))
#define CHECK_FLOAT_GE(a, b, eps) CHECK((a) - (b) > -(eps))
#define CHECK_FLOAT_LE(a, b, eps) CHECK((b) - (a) > -(eps))
#define CHECK_FLOAT_GT(a, b, eps) CHECK((a) - (b) > (eps))
#define CHECK_FLOAT_LT(a, b, eps) CHECK((b) - (a) > (eps))
#define BM_CHECK_FLOAT_EQ(a, b, eps) BM_CHECK(std::fabs((a) - (b)) < (eps))
#define BM_CHECK_FLOAT_NE(a, b, eps) BM_CHECK(std::fabs((a) - (b)) >= (eps))
#define BM_CHECK_FLOAT_GE(a, b, eps) BM_CHECK((a) - (b) > -(eps))
#define BM_CHECK_FLOAT_LE(a, b, eps) BM_CHECK((b) - (a) > -(eps))
#define BM_CHECK_FLOAT_GT(a, b, eps) BM_CHECK((a) - (b) > (eps))
#define BM_CHECK_FLOAT_LT(a, b, eps) BM_CHECK((b) - (a) > (eps))
//clang-format on
#endif // CHECK_H_

View File

@@ -25,8 +25,8 @@
#include "internal_macros.h"
#ifdef BENCHMARK_OS_WINDOWS
#include <windows.h>
#include <io.h>
#include <windows.h>
#else
#include <unistd.h>
#endif // BENCHMARK_OS_WINDOWS
@@ -94,20 +94,20 @@ std::string FormatString(const char* msg, va_list args) {
va_end(args_cp);
// currently there is no error handling for failure, so this is hack.
CHECK(ret >= 0);
BM_CHECK(ret >= 0);
if (ret == 0) // handle empty expansion
if (ret == 0) { // handle empty expansion
return {};
else if (static_cast<size_t>(ret) < size)
return local_buff;
else {
// we did not provide a long enough buffer on our first attempt.
size = (size_t)ret + 1; // + 1 for the null byte
std::unique_ptr<char[]> buff(new char[size]);
ret = vsnprintf(buff.get(), size, msg, args);
CHECK(ret > 0 && ((size_t)ret) < size);
return buff.get();
}
if (static_cast<size_t>(ret) < size) {
return local_buff;
}
// we did not provide a long enough buffer on our first attempt.
size = static_cast<size_t>(ret) + 1; // + 1 for the null byte
std::unique_ptr<char[]> buff(new char[size]);
ret = vsnprintf(buff.get(), size, msg, args);
BM_CHECK(ret > 0 && (static_cast<size_t>(ret)) < size);
return buff.get();
}
std::string FormatString(const char* msg, ...) {
@@ -135,22 +135,30 @@ void ColorPrintf(std::ostream& out, LogColor color, const char* fmt,
// Gets the current text color.
CONSOLE_SCREEN_BUFFER_INFO buffer_info;
GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
const WORD old_color_attrs = buffer_info.wAttributes;
const WORD original_color_attrs = buffer_info.wAttributes;
// We need to flush the stream buffers into the console before each
// SetConsoleTextAttribute call lest it affect the text that is already
// printed but has not yet reached the console.
fflush(stdout);
SetConsoleTextAttribute(stdout_handle,
GetPlatformColorCode(color) | FOREGROUND_INTENSITY);
vprintf(fmt, args);
out.flush();
fflush(stdout);
// Restores the text color.
SetConsoleTextAttribute(stdout_handle, old_color_attrs);
const WORD original_background_attrs =
original_color_attrs & (BACKGROUND_RED | BACKGROUND_GREEN |
BACKGROUND_BLUE | BACKGROUND_INTENSITY);
SetConsoleTextAttribute(stdout_handle, GetPlatformColorCode(color) |
FOREGROUND_INTENSITY |
original_background_attrs);
out << FormatString(fmt, args);
out.flush();
// Restores the text and background color.
SetConsoleTextAttribute(stdout_handle, original_color_attrs);
#else
const char* color_code = GetPlatformColorCode(color);
if (color_code) out << FormatString("\033[0;3%sm", color_code);
if (color_code != nullptr) {
out << FormatString("\033[0;3%sm", color_code);
}
out << FormatString(fmt, args) << "\033[m";
#endif
}
@@ -163,19 +171,31 @@ bool IsColorTerminal() {
#else
// On non-Windows platforms, we rely on the TERM variable. This list of
// supported TERM values is copied from Google Test:
// <https://github.com/google/googletest/blob/master/googletest/src/gtest.cc#L2925>.
// <https://github.com/google/googletest/blob/v1.13.0/googletest/src/gtest.cc#L3225-L3259>.
const char* const SUPPORTED_TERM_VALUES[] = {
"xterm", "xterm-color", "xterm-256color",
"screen", "screen-256color", "tmux",
"tmux-256color", "rxvt-unicode", "rxvt-unicode-256color",
"linux", "cygwin",
"xterm",
"xterm-color",
"xterm-256color",
"screen",
"screen-256color",
"tmux",
"tmux-256color",
"rxvt-unicode",
"rxvt-unicode-256color",
"linux",
"cygwin",
"xterm-kitty",
"alacritty",
"foot",
"foot-extra",
"wezterm",
};
const char* const term = getenv("TERM");
bool term_supports_color = false;
for (const char* candidate : SUPPORTED_TERM_VALUES) {
if (term && 0 == strcmp(term, candidate)) {
if ((term != nullptr) && 0 == strcmp(term, candidate)) {
term_supports_color = true;
break;
}

View File

@@ -14,13 +14,20 @@
#include "commandlineflags.h"
#include <algorithm>
#include <cctype>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <limits>
#include <map>
#include <utility>
#include "../src/string_util.h"
namespace benchmark {
namespace {
// Parses 'str' for a 32-bit signed integer. If successful, writes
// the result to *value and returns true; otherwise leaves *value
// unchanged and returns false.
@@ -75,57 +82,101 @@ bool ParseDouble(const std::string& src_text, const char* str, double* value) {
return true;
}
// Parses 'str' into KV pairs. If successful, writes the result to *value and
// returns true; otherwise leaves *value unchanged and returns false.
bool ParseKvPairs(const std::string& src_text, const char* str,
std::map<std::string, std::string>* value) {
std::map<std::string, std::string> kvs;
for (const auto& kvpair : StrSplit(str, ',')) {
const auto kv = StrSplit(kvpair, '=');
if (kv.size() != 2) {
std::cerr << src_text << " is expected to be a comma-separated list of "
<< "<key>=<value> strings, but actually has value \"" << str
<< "\".\n";
return false;
}
if (!kvs.emplace(kv[0], kv[1]).second) {
std::cerr << src_text << " is expected to contain unique keys but key \""
<< kv[0] << "\" was repeated.\n";
return false;
}
}
*value = kvs;
return true;
}
// Returns the name of the environment variable corresponding to the
// given flag. For example, FlagToEnvVar("foo") will return
// "BENCHMARK_FOO" in the open-source version.
static std::string FlagToEnvVar(const char* flag) {
std::string FlagToEnvVar(const char* flag) {
const std::string flag_str(flag);
std::string env_var;
for (size_t i = 0; i != flag_str.length(); ++i)
for (size_t i = 0; i != flag_str.length(); ++i) {
env_var += static_cast<char>(::toupper(flag_str.c_str()[i]));
return "BENCHMARK_" + env_var;
}
// Reads and returns the Boolean environment variable corresponding to
// the given flag; if it's not set, returns default_value.
//
// The value is considered true iff it's not "0".
bool BoolFromEnv(const char* flag, bool default_value) {
const std::string env_var = FlagToEnvVar(flag);
const char* const string_value = getenv(env_var.c_str());
return string_value == nullptr ? default_value
: strcmp(string_value, "0") != 0;
}
// Reads and returns a 32-bit integer stored in the environment
// variable corresponding to the given flag; if it isn't set or
// doesn't represent a valid 32-bit integer, returns default_value.
int32_t Int32FromEnv(const char* flag, int32_t default_value) {
const std::string env_var = FlagToEnvVar(flag);
const char* const string_value = getenv(env_var.c_str());
if (string_value == nullptr) {
// The environment variable is not set.
return default_value;
}
int32_t result = default_value;
if (!ParseInt32(std::string("Environment variable ") + env_var, string_value,
&result)) {
std::cout << "The default value " << default_value << " is used.\n";
return default_value;
}
return result;
return env_var;
}
// Reads and returns the string environment variable corresponding to
// the given flag; if it's not set, returns default_value.
const char* StringFromEnv(const char* flag, const char* default_value) {
} // namespace
BENCHMARK_EXPORT
bool BoolFromEnv(const char* flag, bool default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value_str = getenv(env_var.c_str());
return value_str == nullptr ? default_val : IsTruthyFlagValue(value_str);
}
BENCHMARK_EXPORT
int32_t Int32FromEnv(const char* flag, int32_t default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value_str = getenv(env_var.c_str());
int32_t value = default_val;
if (value_str == nullptr ||
!ParseInt32(std::string("Environment variable ") + env_var, value_str,
&value)) {
return default_val;
}
return value;
}
BENCHMARK_EXPORT
double DoubleFromEnv(const char* flag, double default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value_str = getenv(env_var.c_str());
double value = default_val;
if (value_str == nullptr ||
!ParseDouble(std::string("Environment variable ") + env_var, value_str,
&value)) {
return default_val;
}
return value;
}
BENCHMARK_EXPORT
const char* StringFromEnv(const char* flag, const char* default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value = getenv(env_var.c_str());
return value == nullptr ? default_value : value;
return value == nullptr ? default_val : value;
}
BENCHMARK_EXPORT
std::map<std::string, std::string> KvPairsFromEnv(
const char* flag, std::map<std::string, std::string> default_val) {
const std::string env_var = FlagToEnvVar(flag);
const char* const value_str = getenv(env_var.c_str());
if (value_str == nullptr) {
return default_val;
}
std::map<std::string, std::string> value;
if (!ParseKvPairs("Environment variable " + env_var, value_str, &value)) {
return default_val;
}
return value;
}
// Parses a string as a command line flag. The string should have
@@ -136,83 +187,135 @@ const char* StringFromEnv(const char* flag, const char* default_value) {
const char* ParseFlagValue(const char* str, const char* flag,
bool def_optional) {
// str and flag must not be nullptr.
if (str == nullptr || flag == nullptr) return nullptr;
if (str == nullptr || flag == nullptr) {
return nullptr;
}
// The flag must start with "--".
const std::string flag_str = std::string("--") + std::string(flag);
const size_t flag_len = flag_str.length();
if (strncmp(str, flag_str.c_str(), flag_len) != 0) return nullptr;
if (strncmp(str, flag_str.c_str(), flag_len) != 0) {
return nullptr;
}
// Skips the flag name.
const char* flag_end = str + flag_len;
// When def_optional is true, it's OK to not have a "=value" part.
if (def_optional && (flag_end[0] == '\0')) return flag_end;
if (def_optional && (flag_end[0] == '\0')) {
return flag_end;
}
// If def_optional is true and there are more characters after the
// flag name, or if def_optional is false, there must be a '=' after
// the flag name.
if (flag_end[0] != '=') return nullptr;
if (flag_end[0] != '=') {
return nullptr;
}
// Returns the string after "=".
return flag_end + 1;
}
BENCHMARK_EXPORT
bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, true);
// Aborts if the parsing failed.
if (value_str == nullptr) return false;
if (value_str == nullptr) {
return false;
}
// Converts the string value to a bool.
*value = IsTruthyFlagValue(value_str);
return true;
}
BENCHMARK_EXPORT
bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, false);
// Aborts if the parsing failed.
if (value_str == nullptr) return false;
if (value_str == nullptr) {
return false;
}
// Sets *value to the value of the flag.
return ParseInt32(std::string("The value of flag --") + flag, value_str,
value);
}
BENCHMARK_EXPORT
bool ParseDoubleFlag(const char* str, const char* flag, double* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, false);
// Aborts if the parsing failed.
if (value_str == nullptr) return false;
if (value_str == nullptr) {
return false;
}
// Sets *value to the value of the flag.
return ParseDouble(std::string("The value of flag --") + flag, value_str,
value);
}
BENCHMARK_EXPORT
bool ParseStringFlag(const char* str, const char* flag, std::string* value) {
// Gets the value of the flag as a string.
const char* const value_str = ParseFlagValue(str, flag, false);
// Aborts if the parsing failed.
if (value_str == nullptr) return false;
if (value_str == nullptr) {
return false;
}
*value = value_str;
return true;
}
BENCHMARK_EXPORT
bool ParseKeyValueFlag(const char* str, const char* flag,
std::map<std::string, std::string>* value) {
const char* const value_str = ParseFlagValue(str, flag, false);
if (value_str == nullptr) {
return false;
}
for (const auto& kvpair : StrSplit(value_str, ',')) {
const auto kv = StrSplit(kvpair, '=');
if (kv.size() != 2) {
return false;
}
value->emplace(kv[0], kv[1]);
}
return true;
}
BENCHMARK_EXPORT
bool IsFlag(const char* str, const char* flag) {
return (ParseFlagValue(str, flag, true) != nullptr);
}
BENCHMARK_EXPORT
bool IsTruthyFlagValue(const std::string& value) {
if (value.empty()) return true;
char ch = value[0];
return isalnum(ch) &&
!(ch == '0' || ch == 'f' || ch == 'F' || ch == 'n' || ch == 'N');
if (value.size() == 1) {
char v = value[0];
return isalnum(v) &&
!(v == '0' || v == 'f' || v == 'F' || v == 'n' || v == 'N');
}
if (!value.empty()) {
std::string value_lower(value);
std::transform(value_lower.begin(), value_lower.end(), value_lower.begin(),
[](char c) { return static_cast<char>(::tolower(c)); });
return !(value_lower == "false" || value_lower == "no" ||
value_lower == "off");
}
return true;
}
} // end namespace benchmark

View File

@@ -2,39 +2,84 @@
#define BENCHMARK_COMMANDLINEFLAGS_H_
#include <cstdint>
#include <map>
#include <string>
#include "benchmark/export.h"
// Macro for referencing flags.
#define FLAG(name) FLAGS_##name
// Macros for declaring flags.
#define DECLARE_bool(name) extern bool FLAG(name)
#define DECLARE_int32(name) extern int32_t FLAG(name)
#define DECLARE_int64(name) extern int64_t FLAG(name)
#define DECLARE_double(name) extern double FLAG(name)
#define DECLARE_string(name) extern std::string FLAG(name)
// NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables)
#define BM_DECLARE_bool(name) BENCHMARK_EXPORT extern bool FLAG(name)
#define BM_DECLARE_int32(name) BENCHMARK_EXPORT extern int32_t FLAG(name)
#define BM_DECLARE_double(name) BENCHMARK_EXPORT extern double FLAG(name)
#define BM_DECLARE_string(name) BENCHMARK_EXPORT extern std::string FLAG(name)
#define BM_DECLARE_kvpairs(name) \
BENCHMARK_EXPORT extern std::map<std::string, std::string> FLAG(name)
// NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables)
// Macros for defining flags.
#define DEFINE_bool(name, default_val, doc) bool FLAG(name) = (default_val)
#define DEFINE_int32(name, default_val, doc) int32_t FLAG(name) = (default_val)
#define DEFINE_int64(name, default_val, doc) int64_t FLAG(name) = (default_val)
#define DEFINE_double(name, default_val, doc) double FLAG(name) = (default_val)
#define DEFINE_string(name, default_val, doc) \
std::string FLAG(name) = (default_val)
// NOLINTBEGIN(cppcoreguidelines-avoid-non-const-global-variables)
#define BM_DEFINE_bool(name, default_val) \
BENCHMARK_EXPORT bool FLAG(name) = benchmark::BoolFromEnv(#name, default_val)
#define BM_DEFINE_int32(name, default_val) \
BENCHMARK_EXPORT int32_t FLAG(name) = \
benchmark::Int32FromEnv(#name, default_val)
#define BM_DEFINE_double(name, default_val) \
BENCHMARK_EXPORT double FLAG(name) = \
benchmark::DoubleFromEnv(#name, default_val)
#define BM_DEFINE_string(name, default_val) \
BENCHMARK_EXPORT std::string FLAG(name) = \
benchmark::StringFromEnv(#name, default_val)
#define BM_DEFINE_kvpairs(name, default_val) \
BENCHMARK_EXPORT std::map<std::string, std::string> FLAG(name) = \
benchmark::KvPairsFromEnv(#name, default_val)
// NOLINTEND(cppcoreguidelines-avoid-non-const-global-variables)
namespace benchmark {
// Parses 'str' for a 32-bit signed integer. If successful, writes the result
// to *value and returns true; otherwise leaves *value unchanged and returns
// false.
bool ParseInt32(const std::string& src_text, const char* str, int32_t* value);
// Parses a bool/Int32/string from the environment variable
// corresponding to the given Google Test flag.
// Parses a bool from the environment variable corresponding to the given flag.
//
// If the variable exists, returns IsTruthyFlagValue() value; if not,
// returns the given default value.
BENCHMARK_EXPORT
bool BoolFromEnv(const char* flag, bool default_val);
// Parses an Int32 from the environment variable corresponding to the given
// flag.
//
// If the variable exists, returns ParseInt32() value; if not, returns
// the given default value.
BENCHMARK_EXPORT
int32_t Int32FromEnv(const char* flag, int32_t default_val);
// Parses an Double from the environment variable corresponding to the given
// flag.
//
// If the variable exists, returns ParseDouble(); if not, returns
// the given default value.
BENCHMARK_EXPORT
double DoubleFromEnv(const char* flag, double default_val);
// Parses a string from the environment variable corresponding to the given
// flag.
//
// If variable exists, returns its value; if not, returns
// the given default value.
BENCHMARK_EXPORT
const char* StringFromEnv(const char* flag, const char* default_val);
// Parses a set of kvpairs from the environment variable corresponding to the
// given flag.
//
// If variable exists, returns its value; if not, returns
// the given default value.
BENCHMARK_EXPORT
std::map<std::string, std::string> KvPairsFromEnv(
const char* flag, std::map<std::string, std::string> default_val);
// Parses a string for a bool flag, in the form of either
// "--flag=value" or "--flag".
//
@@ -44,36 +89,49 @@ const char* StringFromEnv(const char* flag, const char* default_val);
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
BENCHMARK_EXPORT
bool ParseBoolFlag(const char* str, const char* flag, bool* value);
// Parses a string for an Int32 flag, in the form of
// "--flag=value".
// Parses a string for an Int32 flag, in the form of "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
BENCHMARK_EXPORT
bool ParseInt32Flag(const char* str, const char* flag, int32_t* value);
// Parses a string for a Double flag, in the form of
// "--flag=value".
// Parses a string for a Double flag, in the form of "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
BENCHMARK_EXPORT
bool ParseDoubleFlag(const char* str, const char* flag, double* value);
// Parses a string for a string flag, in the form of
// "--flag=value".
// Parses a string for a string flag, in the form of "--flag=value".
//
// On success, stores the value of the flag in *value, and returns
// true. On failure, returns false without changing *value.
BENCHMARK_EXPORT
bool ParseStringFlag(const char* str, const char* flag, std::string* value);
// Parses a string for a kvpairs flag in the form "--flag=key=value,key=value"
//
// On success, stores the value of the flag in *value and returns true. On
// failure returns false, though *value may have been mutated.
BENCHMARK_EXPORT
bool ParseKeyValueFlag(const char* str, const char* flag,
std::map<std::string, std::string>* value);
// Returns true if the string matches the flag.
BENCHMARK_EXPORT
bool IsFlag(const char* str, const char* flag);
// Returns true unless value starts with one of: '0', 'f', 'F', 'n' or 'N', or
// some non-alphanumeric character. As a special case, also returns true if
// value is the empty string.
// some non-alphanumeric character. Also returns false if the value matches
// one of 'no', 'false', 'off' (case-insensitive). As a special case, also
// returns true if value is the empty string.
BENCHMARK_EXPORT
bool IsTruthyFlagValue(const std::string& value);
} // end namespace benchmark
#endif // BENCHMARK_COMMANDLINEFLAGS_H_

View File

@@ -15,34 +15,36 @@
// Source project : https://github.com/ismaelJimenez/cpp.leastsq
// Adapted to be used with google benchmark
#include "benchmark/benchmark.h"
#include "complexity.h"
#include <algorithm>
#include <cmath>
#include "benchmark/benchmark.h"
#include "check.h"
#include "complexity.h"
namespace benchmark {
// Internal function to calculate the different scalability forms
BigOFunc* FittingCurve(BigO complexity) {
static const double kLog2E = 1.44269504088896340736;
switch (complexity) {
case oN:
return [](int64_t n) -> double { return static_cast<double>(n); };
return [](IterationCount n) -> double { return static_cast<double>(n); };
case oNSquared:
return [](int64_t n) -> double { return std::pow(n, 2); };
return [](IterationCount n) -> double { return std::pow(n, 2); };
case oNCubed:
return [](int64_t n) -> double { return std::pow(n, 3); };
return [](IterationCount n) -> double { return std::pow(n, 3); };
case oLogN:
/* Note: can't use log2 because Android's GNU STL lacks it */
return [](int64_t n) { return kLog2E * log(static_cast<double>(n)); };
return [](IterationCount n) -> double {
return std::log2(static_cast<double>(n));
};
case oNLogN:
/* Note: can't use log2 because Android's GNU STL lacks it */
return [](int64_t n) { return kLog2E * n * log(static_cast<double>(n)); };
return [](IterationCount n) -> double {
return static_cast<double>(n) * std::log2(static_cast<double>(n));
};
case o1:
default:
return [](int64_t) { return 1.0; };
return [](IterationCount) { return 1.0; };
}
}
@@ -71,15 +73,14 @@ std::string GetBigOString(BigO complexity) {
// given by the lambda expression.
// - n : Vector containing the size of the benchmark tests.
// - time : Vector containing the times for the benchmark tests.
// - fitting_curve : lambda expression (e.g. [](int64_t n) {return n; };).
// - fitting_curve : lambda expression (e.g. [](ComplexityN n) {return n; };).
// For a deeper explanation on the algorithm logic, look the README file at
// http://github.com/ismaelJimenez/Minimal-Cpp-Least-Squared-Fit
// For a deeper explanation on the algorithm logic, please refer to
// https://en.wikipedia.org/wiki/Least_squares#Least_squares,_regression_analysis_and_statistics
LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
LeastSq MinimalLeastSq(const std::vector<ComplexityN>& n,
const std::vector<double>& time,
BigOFunc* fitting_curve) {
double sigma_gn = 0.0;
double sigma_gn_squared = 0.0;
double sigma_time = 0.0;
double sigma_time_gn = 0.0;
@@ -87,7 +88,6 @@ LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
// Calculate least square fitting parameter
for (size_t i = 0; i < n.size(); ++i) {
double gn_i = fitting_curve(n[i]);
sigma_gn += gn_i;
sigma_gn_squared += gn_i * gn_i;
sigma_time += time[i];
sigma_time_gn += time[i] * gn_i;
@@ -103,12 +103,12 @@ LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
double rms = 0.0;
for (size_t i = 0; i < n.size(); ++i) {
double fit = result.coef * fitting_curve(n[i]);
rms += pow((time[i] - fit), 2);
rms += std::pow((time[i] - fit), 2);
}
// Normalized RMS by the mean of the observed values
double mean = sigma_time / n.size();
result.rms = sqrt(rms / n.size()) / mean;
double mean = sigma_time / static_cast<double>(n.size());
result.rms = std::sqrt(rms / static_cast<double>(n.size())) / mean;
return result;
}
@@ -120,12 +120,12 @@ LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
// - complexity : If different than oAuto, the fitting curve will stick to
// this one. If it is oAuto, it will be calculated the best
// fitting curve.
LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
LeastSq MinimalLeastSq(const std::vector<ComplexityN>& n,
const std::vector<double>& time, const BigO complexity) {
CHECK_EQ(n.size(), time.size());
CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two
// benchmark runs are given
CHECK_NE(complexity, oNone);
BM_CHECK_EQ(n.size(), time.size());
BM_CHECK_GE(n.size(), 2); // Do not compute fitting curve is less than two
// benchmark runs are given
BM_CHECK_NE(complexity, oNone);
LeastSq best_fit;
@@ -157,19 +157,24 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
typedef BenchmarkReporter::Run Run;
std::vector<Run> results;
if (reports.size() < 2) return results;
if (reports.size() < 2) {
return results;
}
// Accumulators.
std::vector<int64_t> n;
std::vector<ComplexityN> n;
std::vector<double> real_time;
std::vector<double> cpu_time;
// Populate the accumulators.
for (const Run& run : reports) {
CHECK_GT(run.complexity_n, 0) << "Did you forget to call SetComplexityN?";
BM_CHECK_GT(run.complexity_n, 0)
<< "Did you forget to call SetComplexityN?";
n.push_back(run.complexity_n);
real_time.push_back(run.real_accumulated_time / run.iterations);
cpu_time.push_back(run.cpu_accumulated_time / run.iterations);
real_time.push_back(run.real_accumulated_time /
static_cast<double>(run.iterations));
cpu_time.push_back(run.cpu_accumulated_time /
static_cast<double>(run.iterations));
}
LeastSq result_cpu;
@@ -179,18 +184,37 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity_lambda);
result_real = MinimalLeastSq(n, real_time, reports[0].complexity_lambda);
} else {
result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity);
result_real = MinimalLeastSq(n, real_time, result_cpu.complexity);
const BigO* InitialBigO = &reports[0].complexity;
const bool use_real_time_for_initial_big_o =
reports[0].use_real_time_for_initial_big_o;
if (use_real_time_for_initial_big_o) {
result_real = MinimalLeastSq(n, real_time, *InitialBigO);
InitialBigO = &result_real.complexity;
// The Big-O complexity for CPU time must have the same Big-O function!
}
result_cpu = MinimalLeastSq(n, cpu_time, *InitialBigO);
InitialBigO = &result_cpu.complexity;
if (!use_real_time_for_initial_big_o) {
result_real = MinimalLeastSq(n, real_time, *InitialBigO);
}
}
std::string run_name = reports[0].benchmark_name().substr(
0, reports[0].benchmark_name().find('/'));
// Drop the 'args' when reporting complexity.
auto run_name = reports[0].run_name;
run_name.args.clear();
// Get the data from the accumulator to BenchmarkReporter::Run's.
Run big_o;
big_o.run_name = run_name;
big_o.family_index = reports[0].family_index;
big_o.per_family_instance_index = reports[0].per_family_instance_index;
big_o.run_type = BenchmarkReporter::Run::RT_Aggregate;
big_o.repetitions = reports[0].repetitions;
big_o.repetition_index = Run::no_repetition_index;
big_o.threads = reports[0].threads;
big_o.aggregate_name = "BigO";
big_o.aggregate_unit = StatisticUnit::kTime;
big_o.report_label = reports[0].report_label;
big_o.iterations = 0;
big_o.real_accumulated_time = result_real.coef;
big_o.cpu_accumulated_time = result_cpu.coef;
@@ -207,11 +231,16 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
// Only add label to mean/stddev if it is same for all runs
Run rms;
rms.run_name = run_name;
big_o.report_label = reports[0].report_label;
rms.family_index = reports[0].family_index;
rms.per_family_instance_index = reports[0].per_family_instance_index;
rms.run_type = BenchmarkReporter::Run::RT_Aggregate;
rms.aggregate_name = "RMS";
rms.aggregate_unit = StatisticUnit::kPercentage;
rms.report_label = big_o.report_label;
rms.iterations = 0;
rms.repetition_index = Run::no_repetition_index;
rms.repetitions = reports[0].repetitions;
rms.threads = reports[0].threads;
rms.real_accumulated_time = result_real.rms / multiplier;
rms.cpu_accumulated_time = result_cpu.rms / multiplier;
rms.report_rms = true;

View File

@@ -31,7 +31,7 @@ std::vector<BenchmarkReporter::Run> ComputeBigO(
const std::vector<BenchmarkReporter::Run>& reports);
// This data structure will contain the result returned by MinimalLeastSq
// - coef : Estimated coeficient for the high-order term as
// - coef : Estimated coefficient for the high-order term as
// interpolated from data.
// - rms : Normalized Root Mean Squared Error.
// - complexity : Scalability form (e.g. oN, oNLogN). In case a scalability

View File

@@ -12,27 +12,28 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "benchmark/benchmark.h"
#include "complexity.h"
#include "counter.h"
#include <algorithm>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <string>
#include <tuple>
#include <vector>
#include "benchmark/benchmark.h"
#include "check.h"
#include "colorprint.h"
#include "commandlineflags.h"
#include "complexity.h"
#include "counter.h"
#include "internal_macros.h"
#include "string_util.h"
#include "timers.h"
namespace benchmark {
BENCHMARK_EXPORT
bool ConsoleReporter::ReportContext(const Context& context) {
name_field_width_ = context.name_field_width;
printed_header_ = false;
@@ -41,34 +42,40 @@ bool ConsoleReporter::ReportContext(const Context& context) {
PrintBasicContext(&GetErrorStream(), context);
#ifdef BENCHMARK_OS_WINDOWS
if ((output_options_ & OO_Color) && &std::cout != &GetOutputStream()) {
GetErrorStream()
<< "Color printing is only supported for stdout on windows."
" Disabling color printing\n";
output_options_ = static_cast< OutputOptions >(output_options_ & ~OO_Color);
if ((output_options_ & OO_Color)) {
auto stdOutBuf = std::cout.rdbuf();
auto outStreamBuf = GetOutputStream().rdbuf();
if (stdOutBuf != outStreamBuf) {
GetErrorStream()
<< "Color printing is only supported for stdout on windows."
" Disabling color printing\n";
output_options_ = static_cast<OutputOptions>(output_options_ & ~OO_Color);
}
}
#endif
return true;
}
BENCHMARK_EXPORT
void ConsoleReporter::PrintHeader(const Run& run) {
std::string str = FormatString("%-*s %13s %13s %10s", static_cast<int>(name_field_width_),
"Benchmark", "Time", "CPU", "Iterations");
if(!run.counters.empty()) {
if(output_options_ & OO_Tabular) {
for(auto const& c : run.counters) {
std::string str =
FormatString("%-*s %13s %15s %12s", static_cast<int>(name_field_width_),
"Benchmark", "Time", "CPU", "Iterations");
if (!run.counters.empty()) {
if ((output_options_ & OO_Tabular) != 0) {
for (auto const& c : run.counters) {
str += FormatString(" %10s", c.first.c_str());
}
} else {
str += " UserCounters...";
}
}
str += "\n";
std::string line = std::string(str.length(), '-');
GetOutputStream() << line << "\n" << str << line << "\n";
GetOutputStream() << line << "\n" << str << "\n" << line << "\n";
}
BENCHMARK_EXPORT
void ConsoleReporter::ReportRuns(const std::vector<Run>& reports) {
for (const auto& run : reports) {
// print the header:
@@ -76,7 +83,7 @@ void ConsoleReporter::ReportRuns(const std::vector<Run>& reports) {
bool print_header = !printed_header_;
// --- or if the format is tabular and this run
// has different fields from the prev header
print_header |= (output_options_ & OO_Tabular) &&
print_header |= ((output_options_ & OO_Tabular) != 0) &&
(!internal::SameNames(run.counters, prev_counters_));
if (print_header) {
printed_header_ = true;
@@ -90,65 +97,108 @@ void ConsoleReporter::ReportRuns(const std::vector<Run>& reports) {
}
}
static void IgnoreColorPrint(std::ostream& out, LogColor, const char* fmt,
...) {
static void IgnoreColorPrint(std::ostream& out, LogColor /*unused*/,
const char* fmt, ...) {
va_list args;
va_start(args, fmt);
out << FormatString(fmt, args);
va_end(args);
}
static std::string FormatTime(double time) {
// For the time columns of the console printer 13 digits are reserved. One of
// them is a space and max two of them are the time unit (e.g ns). That puts
// us at 10 digits usable for the number.
// Align decimal places...
if (time < 1.0) {
return FormatString("%10.3f", time);
}
if (time < 10.0) {
return FormatString("%10.2f", time);
}
if (time < 100.0) {
return FormatString("%10.1f", time);
}
// Assuming the time is at max 9.9999e+99 and we have 10 digits for the
// number, we get 10-1(.)-1(e)-1(sign)-2(exponent) = 5 digits to print.
if (time > 9999999999 /*max 10 digit number*/) {
return FormatString("%1.4e", time);
}
return FormatString("%10.0f", time);
}
BENCHMARK_EXPORT
void ConsoleReporter::PrintRunData(const Run& result) {
typedef void(PrinterFn)(std::ostream&, LogColor, const char*, ...);
auto& Out = GetOutputStream();
PrinterFn* printer = (output_options_ & OO_Color) ?
(PrinterFn*)ColorPrintf : IgnoreColorPrint;
PrinterFn* printer = (output_options_ & OO_Color) != 0
? static_cast<PrinterFn*>(ColorPrintf)
: IgnoreColorPrint;
auto name_color =
(result.report_big_o || result.report_rms) ? COLOR_BLUE : COLOR_GREEN;
printer(Out, name_color, "%-*s ", name_field_width_,
result.benchmark_name().c_str());
if (result.error_occurred) {
if (internal::SkippedWithError == result.skipped) {
printer(Out, COLOR_RED, "ERROR OCCURRED: \'%s\'",
result.error_message.c_str());
result.skip_message.c_str());
printer(Out, COLOR_DEFAULT, "\n");
return;
}
if (internal::SkippedWithMessage == result.skipped) {
printer(Out, COLOR_WHITE, "SKIPPED: \'%s\'", result.skip_message.c_str());
printer(Out, COLOR_DEFAULT, "\n");
return;
}
const double real_time = result.GetAdjustedRealTime();
const double cpu_time = result.GetAdjustedCPUTime();
const std::string real_time_str = FormatTime(real_time);
const std::string cpu_time_str = FormatTime(cpu_time);
if (result.report_big_o) {
std::string big_o = GetBigOString(result.complexity);
printer(Out, COLOR_YELLOW, "%10.2f %s %10.2f %s ", real_time, big_o.c_str(),
cpu_time, big_o.c_str());
printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ", real_time,
big_o.c_str(), cpu_time, big_o.c_str());
} else if (result.report_rms) {
printer(Out, COLOR_YELLOW, "%10.0f %% %10.0f %% ", real_time * 100,
cpu_time * 100);
} else {
printer(Out, COLOR_YELLOW, "%10.0f %-4s %10.0f %-4s ", real_time * 100, "%",
cpu_time * 100, "%");
} else if (result.run_type != Run::RT_Aggregate ||
result.aggregate_unit == StatisticUnit::kTime) {
const char* timeLabel = GetTimeUnitString(result.time_unit);
printer(Out, COLOR_YELLOW, "%10.0f %s %10.0f %s ", real_time, timeLabel,
cpu_time, timeLabel);
printer(Out, COLOR_YELLOW, "%s %-4s %s %-4s ", real_time_str.c_str(),
timeLabel, cpu_time_str.c_str(), timeLabel);
} else {
assert(result.aggregate_unit == StatisticUnit::kPercentage);
printer(Out, COLOR_YELLOW, "%10.2f %-4s %10.2f %-4s ",
(100. * result.real_accumulated_time), "%",
(100. * result.cpu_accumulated_time), "%");
}
if (!result.report_big_o && !result.report_rms) {
printer(Out, COLOR_CYAN, "%10lld", result.iterations);
}
for (auto& c : result.counters) {
const std::size_t cNameLen = std::max(std::string::size_type(10),
c.first.length());
auto const& s = HumanReadableNumber(c.second.value, c.second.oneK);
if (output_options_ & OO_Tabular) {
if (c.second.flags & Counter::kIsRate) {
printer(Out, COLOR_DEFAULT, " %*s/s", cNameLen - 2, s.c_str());
} else {
printer(Out, COLOR_DEFAULT, " %*s", cNameLen, s.c_str());
}
for (const auto& c : result.counters) {
const std::size_t cNameLen =
std::max(static_cast<std::size_t>(10), c.first.length());
std::string s;
const char* unit = "";
if (result.run_type == Run::RT_Aggregate &&
result.aggregate_unit == StatisticUnit::kPercentage) {
s = StrFormat("%.2f", 100. * c.second.value);
unit = "%";
} else {
const char* unit = (c.second.flags & Counter::kIsRate) ? "/s" : "";
printer(Out, COLOR_DEFAULT, " %s=%s%s", c.first.c_str(), s.c_str(),
s = HumanReadableNumber(c.second.value, c.second.oneK);
if ((c.second.flags & Counter::kIsRate) != 0) {
unit = (c.second.flags & Counter::kInvert) != 0 ? "s" : "/s";
}
}
if ((output_options_ & OO_Tabular) != 0) {
printer(Out, COLOR_DEFAULT, " %*s%s", cNameLen - strlen(unit), s.c_str(),
unit);
} else {
printer(Out, COLOR_DEFAULT, " %s=%s%s", c.first.c_str(), s.c_str(), unit);
}
}

View File

@@ -17,25 +17,30 @@
namespace benchmark {
namespace internal {
double Finish(Counter const& c, int64_t iterations, double cpu_time,
double Finish(Counter const& c, IterationCount iterations, double cpu_time,
double num_threads) {
double v = c.value;
if (c.flags & Counter::kIsRate) {
if ((c.flags & Counter::kIsRate) != 0) {
v /= cpu_time;
}
if (c.flags & Counter::kAvgThreads) {
if ((c.flags & Counter::kAvgThreads) != 0) {
v /= num_threads;
}
if (c.flags & Counter::kIsIterationInvariant) {
v *= iterations;
if ((c.flags & Counter::kIsIterationInvariant) != 0) {
v *= static_cast<double>(iterations);
}
if (c.flags & Counter::kAvgIterations) {
v /= iterations;
if ((c.flags & Counter::kAvgIterations) != 0) {
v /= static_cast<double>(iterations);
}
if ((c.flags & Counter::kInvert) != 0) { // Invert is *always* last.
v = 1.0 / v;
}
return v;
}
void Finish(UserCounters* l, int64_t iterations, double cpu_time, double num_threads) {
void Finish(UserCounters* l, IterationCount iterations, double cpu_time,
double num_threads) {
for (auto& c : *l) {
c.second.value = Finish(c.second, iterations, cpu_time, num_threads);
}
@@ -59,7 +64,9 @@ void Increment(UserCounters* l, UserCounters const& r) {
}
bool SameNames(UserCounters const& l, UserCounters const& r) {
if (&l == &r) return true;
if (&l == &r) {
return true;
}
if (l.size() != r.size()) {
return false;
}

View File

@@ -12,15 +12,21 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef BENCHMARK_COUNTER_H_
#define BENCHMARK_COUNTER_H_
#include "benchmark/benchmark.h"
namespace benchmark {
// these counter-related functions are hidden to reduce API surface.
namespace internal {
void Finish(UserCounters* l, int64_t iterations, double time, double num_threads);
void Finish(UserCounters* l, IterationCount iterations, double time,
double num_threads);
void Increment(UserCounters* l, UserCounters const& r);
bool SameNames(UserCounters const& l, UserCounters const& r);
} // end namespace internal
} // end namespace benchmark
#endif // BENCHMARK_COUNTER_H_

View File

@@ -12,9 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "benchmark/benchmark.h"
#include "complexity.h"
#include <algorithm>
#include <cstdint>
#include <iostream>
@@ -22,7 +19,9 @@
#include <tuple>
#include <vector>
#include "benchmark/benchmark.h"
#include "check.h"
#include "complexity.h"
#include "string_util.h"
#include "timers.h"
@@ -37,11 +36,29 @@ std::vector<std::string> elements = {
"error_occurred", "error_message"};
} // namespace
std::string CsvEscape(const std::string& s) {
std::string tmp;
tmp.reserve(s.size() + 2);
for (char c : s) {
switch (c) {
case '"':
tmp += "\"\"";
break;
default:
tmp += c;
break;
}
}
return '"' + tmp + '"';
}
BENCHMARK_EXPORT
bool CSVReporter::ReportContext(const Context& context) {
PrintBasicContext(&GetErrorStream(), context);
return true;
}
BENCHMARK_EXPORT
void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
std::ostream& Out = GetOutputStream();
@@ -49,8 +66,10 @@ void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
// save the names of all the user counters
for (const auto& run : reports) {
for (const auto& cnt : run.counters) {
if (cnt.first == "bytes_per_second" || cnt.first == "items_per_second")
if (cnt.first == "bytes_per_second" ||
cnt.first == "items_per_second") {
continue;
}
user_counter_names_.insert(cnt.first);
}
}
@@ -58,7 +77,9 @@ void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
// print the header
for (auto B = elements.begin(); B != elements.end();) {
Out << *B++;
if (B != elements.end()) Out << ",";
if (B != elements.end()) {
Out << ",";
}
}
for (auto B = user_counter_names_.begin();
B != user_counter_names_.end();) {
@@ -71,9 +92,12 @@ void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
// check that all the current counters are saved in the name set
for (const auto& run : reports) {
for (const auto& cnt : run.counters) {
if (cnt.first == "bytes_per_second" || cnt.first == "items_per_second")
if (cnt.first == "bytes_per_second" ||
cnt.first == "items_per_second") {
continue;
CHECK(user_counter_names_.find(cnt.first) != user_counter_names_.end())
}
BM_CHECK(user_counter_names_.find(cnt.first) !=
user_counter_names_.end())
<< "All counters must be present in each run. "
<< "Counter named \"" << cnt.first
<< "\" was not in a run after being added to the header";
@@ -87,20 +111,14 @@ void CSVReporter::ReportRuns(const std::vector<Run>& reports) {
}
}
BENCHMARK_EXPORT
void CSVReporter::PrintRunData(const Run& run) {
std::ostream& Out = GetOutputStream();
// Field with embedded double-quote characters must be doubled and the field
// delimited with double-quotes.
std::string name = run.benchmark_name();
ReplaceAll(&name, "\"", "\"\"");
Out << '"' << name << "\",";
if (run.error_occurred) {
Out << CsvEscape(run.benchmark_name()) << ",";
if (run.skipped != 0u) {
Out << std::string(elements.size() - 3, ',');
Out << "true,";
std::string msg = run.error_message;
ReplaceAll(&msg, "\"", "\"\"");
Out << '"' << msg << "\"\n";
Out << std::boolalpha << (internal::SkippedWithError == run.skipped) << ",";
Out << CsvEscape(run.skip_message) << "\n";
return;
}
@@ -110,13 +128,21 @@ void CSVReporter::PrintRunData(const Run& run) {
}
Out << ",";
Out << run.GetAdjustedRealTime() << ",";
Out << run.GetAdjustedCPUTime() << ",";
if (run.run_type != Run::RT_Aggregate ||
run.aggregate_unit == StatisticUnit::kTime) {
Out << run.GetAdjustedRealTime() << ",";
Out << run.GetAdjustedCPUTime() << ",";
} else {
assert(run.aggregate_unit == StatisticUnit::kPercentage);
Out << run.real_accumulated_time << ",";
Out << run.cpu_accumulated_time << ",";
}
// Do not print timeLabel on bigO and RMS report
if (run.report_big_o) {
Out << GetBigOString(run.complexity);
} else if (!run.report_rms) {
} else if (!run.report_rms &&
run.aggregate_unit != StatisticUnit::kPercentage) {
Out << GetTimeUnitString(run.time_unit);
}
Out << ",";
@@ -130,11 +156,7 @@ void CSVReporter::PrintRunData(const Run& run) {
}
Out << ",";
if (!run.report_label.empty()) {
// Field with embedded double-quote characters must be doubled and the field
// delimited with double-quotes.
std::string label = run.report_label;
ReplaceAll(&label, "\"", "\"\"");
Out << "\"" << label << "\"";
Out << CsvEscape(run.report_label);
}
Out << ",,"; // for error_occurred and error_message

View File

@@ -36,7 +36,8 @@
// declarations of some other intrinsics, breaking compilation.
// Therefore, we simply declare __rdtsc ourselves. See also
// http://connect.microsoft.com/VisualStudio/feedback/details/262047
#if defined(COMPILER_MSVC) && !defined(_M_IX86)
#if defined(COMPILER_MSVC) && !defined(_M_IX86) && !defined(_M_ARM64) && \
!defined(_M_ARM64EC)
extern "C" uint64_t __rdtsc();
#pragma intrinsic(__rdtsc)
#endif
@@ -69,7 +70,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
// frequency scaling). Also note that when the Mac sleeps, this
// counter pauses; it does not continue counting, nor does it
// reset to zero.
return mach_absolute_time();
return static_cast<int64_t>(mach_absolute_time());
#elif defined(BENCHMARK_OS_EMSCRIPTEN)
// this goes above x86-specific code because old versions of Emscripten
// define __x86_64__, although they have nothing to do with it.
@@ -81,16 +82,24 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
#elif defined(__x86_64__) || defined(__amd64__)
uint64_t low, high;
__asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
return (high << 32) | low;
return static_cast<int64_t>((high << 32) | low);
#elif defined(__powerpc__) || defined(__ppc__)
// This returns a time-base, which is not always precisely a cycle-count.
int64_t tbl, tbu0, tbu1;
asm("mftbu %0" : "=r"(tbu0));
asm("mftb %0" : "=r"(tbl));
asm("mftbu %0" : "=r"(tbu1));
tbl &= -static_cast<int64_t>(tbu0 == tbu1);
// high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage)
return (tbu1 << 32) | tbl;
#if defined(__powerpc64__) || defined(__ppc64__)
int64_t tb;
asm volatile("mfspr %0, 268" : "=r"(tb));
return tb;
#else
uint32_t tbl, tbu0, tbu1;
asm volatile(
"mftbu %0\n"
"mftb %1\n"
"mftbu %2"
: "=r"(tbu0), "=r"(tbl), "=r"(tbu1));
tbl &= -static_cast<int32_t>(tbu0 == tbu1);
// high 32 bits in tbu1; low 32 bits in tbl (tbu0 is no longer needed)
return (static_cast<uint64_t>(tbu1) << 32) | tbl;
#endif
#elif defined(__sparc__)
int64_t tick;
asm(".byte 0x83, 0x41, 0x00, 0x00");
@@ -106,6 +115,12 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
// when I know it will work. Otherwise, I'll use __rdtsc and hope
// the code is being compiled with a non-ancient compiler.
_asm rdtsc
#elif defined(COMPILER_MSVC) && (defined(_M_ARM64) || defined(_M_ARM64EC))
// See // https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics
// and https://reviews.llvm.org/D53115
int64_t virtual_timer_value;
virtual_timer_value = _ReadStatusReg(ARM64_CNTVCT);
return virtual_timer_value;
#elif defined(COMPILER_MSVC)
return __rdtsc();
#elif defined(BENCHMARK_OS_NACL)
@@ -118,7 +133,7 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
// Native Client does not provide any API to access cycle counter.
// Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday
// because is provides nanosecond resolution (which is noticable at
// because is provides nanosecond resolution (which is noticeable at
// least for PNaCl modules running on x86 Mac & Linux).
// Initialize to always return 0 if clock_gettime fails.
struct timespec ts = {0, 0};
@@ -153,21 +168,84 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() {
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#elif defined(__mips__)
#elif defined(__mips__) || defined(__m68k__)
// mips apparently only allows rdtsc for superusers, so we fall
// back to gettimeofday. It's possible clock_gettime would be better.
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#elif defined(__loongarch__) || defined(__csky__)
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#elif defined(__s390__) // Covers both s390 and s390x.
// Return the CPU clock.
uint64_t tsc;
asm("stck %0" : "=Q"(tsc) : : "cc");
return tsc;
#if defined(BENCHMARK_OS_ZOS)
// z/OS HLASM syntax.
asm(" stck %0" : "=m"(tsc) : : "cc");
#else
// The soft failover to a generic implementation is automatic only for ARM.
// For other platforms the developer is expected to make an attempt to create
// a fast implementation and use generic version if nothing better is available.
// Linux on Z syntax.
asm("stck %0" : "=Q"(tsc) : : "cc");
#endif
return tsc;
#elif defined(__riscv) // RISC-V
// Use RDTIME (and RDTIMEH on riscv32).
// RDCYCLE is a privileged instruction since Linux 6.6.
#if __riscv_xlen == 32
uint32_t cycles_lo, cycles_hi0, cycles_hi1;
// This asm also includes the PowerPC overflow handling strategy, as above.
// Implemented in assembly because Clang insisted on branching.
asm volatile(
"rdtimeh %0\n"
"rdtime %1\n"
"rdtimeh %2\n"
"sub %0, %0, %2\n"
"seqz %0, %0\n"
"sub %0, zero, %0\n"
"and %1, %1, %0\n"
: "=r"(cycles_hi0), "=r"(cycles_lo), "=r"(cycles_hi1));
return static_cast<int64_t>((static_cast<uint64_t>(cycles_hi1) << 32) |
cycles_lo);
#else
uint64_t cycles;
asm volatile("rdtime %0" : "=r"(cycles));
return static_cast<int64_t>(cycles);
#endif
#elif defined(__e2k__) || defined(__elbrus__)
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#elif defined(__hexagon__)
uint64_t pcycle;
asm volatile("%0 = C15:14" : "=r"(pcycle));
return static_cast<int64_t>(pcycle);
#elif defined(__alpha__)
// Alpha has a cycle counter, the PCC register, but it is an unsigned 32-bit
// integer and thus wraps every ~4s, making using it for tick counts
// unreliable beyond this time range. The real-time clock is low-precision,
// roughtly ~1ms, but it is the only option that can reasonable count
// indefinitely.
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#elif defined(__hppa__) || defined(__linux__)
// Fallback for all other architectures with a recent Linux kernel, e.g.:
// HP PA-RISC provides a user-readable clock counter (cr16), but
// it's not syncronized across CPUs and only 32-bit wide when programs
// are built as 32-bit binaries.
// Same for SH-4 and possibly others.
// Use clock_gettime(CLOCK_MONOTONIC, ...) instead of gettimeofday
// because is provides nanosecond resolution.
// Initialize to always return 0 if clock_gettime fails.
struct timespec ts = {0, 0};
clock_gettime(CLOCK_MONOTONIC, &ts);
return static_cast<int64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#else
// The soft failover to a generic implementation is automatic only for ARM.
// For other platforms the developer is expected to make an attempt to create
// a fast implementation and use generic version if nothing better is
// available.
#error You need to define CycleTimer for your OS and CPU
#endif
}

View File

@@ -1,8 +1,6 @@
#ifndef BENCHMARK_INTERNAL_MACROS_H_
#define BENCHMARK_INTERNAL_MACROS_H_
#include "benchmark/benchmark.h"
/* Needed to detect STL */
#include <cstdlib>
@@ -40,6 +38,19 @@
#define BENCHMARK_OS_CYGWIN 1
#elif defined(_WIN32)
#define BENCHMARK_OS_WINDOWS 1
// WINAPI_FAMILY_PARTITION is defined in winapifamily.h.
// We include windows.h which implicitly includes winapifamily.h for compatibility.
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <windows.h>
#if defined(WINAPI_FAMILY_PARTITION)
#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
#define BENCHMARK_OS_WINDOWS_WIN32 1
#elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
#define BENCHMARK_OS_WINDOWS_RT 1
#endif
#endif
#if defined(__MINGW32__)
#define BENCHMARK_OS_MINGW 1
#endif
@@ -58,6 +69,8 @@
#define BENCHMARK_OS_NETBSD 1
#elif defined(__OpenBSD__)
#define BENCHMARK_OS_OPENBSD 1
#elif defined(__DragonFly__)
#define BENCHMARK_OS_DRAGONFLY 1
#elif defined(__linux__)
#define BENCHMARK_OS_LINUX 1
#elif defined(__native_client__)
@@ -70,6 +83,12 @@
#define BENCHMARK_OS_FUCHSIA 1
#elif defined (__SVR4) && defined (__sun)
#define BENCHMARK_OS_SOLARIS 1
#elif defined(__QNX__)
#define BENCHMARK_OS_QNX 1
#elif defined(__MVS__)
#define BENCHMARK_OS_ZOS 1
#elif defined(__hexagon__)
#define BENCHMARK_OS_QURT 1
#endif
#if defined(__ANDROID__) && defined(__GLIBCXX__)

View File

@@ -12,10 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "benchmark/benchmark.h"
#include "complexity.h"
#include <algorithm>
#include <cmath>
#include <cstdint>
#include <iomanip> // for setprecision
#include <iostream>
@@ -24,43 +22,92 @@
#include <tuple>
#include <vector>
#include "benchmark/benchmark.h"
#include "complexity.h"
#include "string_util.h"
#include "timers.h"
namespace benchmark {
namespace {
std::string StrEscape(const std::string& s) {
std::string tmp;
tmp.reserve(s.size());
for (char c : s) {
switch (c) {
case '\b':
tmp += "\\b";
break;
case '\f':
tmp += "\\f";
break;
case '\n':
tmp += "\\n";
break;
case '\r':
tmp += "\\r";
break;
case '\t':
tmp += "\\t";
break;
case '\\':
tmp += "\\\\";
break;
case '"':
tmp += "\\\"";
break;
default:
tmp += c;
break;
}
}
return tmp;
}
std::string FormatKV(std::string const& key, std::string const& value) {
return StrFormat("\"%s\": \"%s\"", key.c_str(), value.c_str());
return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(),
StrEscape(value).c_str());
}
std::string FormatKV(std::string const& key, const char* value) {
return StrFormat("\"%s\": \"%s\"", key.c_str(), value);
return StrFormat("\"%s\": \"%s\"", StrEscape(key).c_str(),
StrEscape(value).c_str());
}
std::string FormatKV(std::string const& key, bool value) {
return StrFormat("\"%s\": %s", key.c_str(), value ? "true" : "false");
return StrFormat("\"%s\": %s", StrEscape(key).c_str(),
value ? "true" : "false");
}
std::string FormatKV(std::string const& key, int64_t value) {
std::stringstream ss;
ss << '"' << key << "\": " << value;
ss << '"' << StrEscape(key) << "\": " << value;
return ss.str();
}
std::string FormatKV(std::string const& key, int value) {
return FormatKV(key, static_cast<int64_t>(value));
}
std::string FormatKV(std::string const& key, double value) {
std::stringstream ss;
ss << '"' << key << "\": ";
ss << '"' << StrEscape(key) << "\": ";
const auto max_digits10 = std::numeric_limits<decltype(value)>::max_digits10;
const auto max_fractional_digits10 = max_digits10 - 1;
ss << std::scientific << std::setprecision(max_fractional_digits10) << value;
if (std::isnan(value)) {
ss << (value < 0 ? "-" : "") << "NaN";
} else if (std::isinf(value)) {
ss << (value < 0 ? "-" : "") << "Infinity";
} else {
const auto max_digits10 =
std::numeric_limits<decltype(value)>::max_digits10;
const auto max_fractional_digits10 = max_digits10 - 1;
ss << std::scientific << std::setprecision(max_fractional_digits10)
<< value;
}
return ss.str();
}
int64_t RoundDouble(double v) { return static_cast<int64_t>(v + 0.5); }
int64_t RoundDouble(double v) { return std::lround(v); }
} // end namespace
@@ -77,13 +124,10 @@ bool JSONReporter::ReportContext(const Context& context) {
std::string walltime_value = LocalDateTimeString();
out << indent << FormatKV("date", walltime_value) << ",\n";
if (Context::executable_name) {
// windows uses backslash for its path separator,
// which must be escaped in JSON otherwise it blows up conforming JSON
// decoders
std::string executable_name = Context::executable_name;
ReplaceAll(&executable_name, "\\", "\\\\");
out << indent << FormatKV("executable", executable_name) << ",\n";
out << indent << FormatKV("host_name", context.sys_info.name) << ",\n";
if (Context::executable_name != nullptr) {
out << indent << FormatKV("executable", Context::executable_name) << ",\n";
}
CPUInfo const& info = context.cpu_info;
@@ -93,25 +137,39 @@ bool JSONReporter::ReportContext(const Context& context) {
<< FormatKV("mhz_per_cpu",
RoundDouble(info.cycles_per_second / 1000000.0))
<< ",\n";
out << indent << FormatKV("cpu_scaling_enabled", info.scaling_enabled)
<< ",\n";
if (CPUInfo::Scaling::UNKNOWN != info.scaling) {
out << indent
<< FormatKV("cpu_scaling_enabled",
info.scaling == CPUInfo::Scaling::ENABLED)
<< ",\n";
}
const SystemInfo& sysinfo = context.sys_info;
if (SystemInfo::ASLR::UNKNOWN != sysinfo.ASLRStatus) {
out << indent
<< FormatKV("aslr_enabled",
sysinfo.ASLRStatus == SystemInfo::ASLR::ENABLED)
<< ",\n";
}
out << indent << "\"caches\": [\n";
indent = std::string(6, ' ');
std::string cache_indent(8, ' ');
for (size_t i = 0; i < info.caches.size(); ++i) {
auto& CI = info.caches[i];
const auto& CI = info.caches[i];
out << indent << "{\n";
out << cache_indent << FormatKV("type", CI.type) << ",\n";
out << cache_indent << FormatKV("level", static_cast<int64_t>(CI.level))
<< ",\n";
out << cache_indent
<< FormatKV("size", static_cast<int64_t>(CI.size) * 1000u) << ",\n";
out << cache_indent << FormatKV("size", static_cast<int64_t>(CI.size))
<< ",\n";
out << cache_indent
<< FormatKV("num_sharing", static_cast<int64_t>(CI.num_sharing))
<< "\n";
out << indent << "}";
if (i != info.caches.size() - 1) out << ",";
if (i != info.caches.size() - 1) {
out << ",";
}
out << "\n";
}
indent = std::string(4, ' ');
@@ -119,16 +177,37 @@ bool JSONReporter::ReportContext(const Context& context) {
out << indent << "\"load_avg\": [";
for (auto it = info.load_avg.begin(); it != info.load_avg.end();) {
out << *it++;
if (it != info.load_avg.end()) out << ",";
if (it != info.load_avg.end()) {
out << ",";
}
}
out << "],\n";
out << indent << FormatKV("library_version", GetBenchmarkVersion());
out << ",\n";
#if defined(NDEBUG)
const char build_type[] = "release";
#else
const char build_type[] = "debug";
#endif
out << indent << FormatKV("library_build_type", build_type) << "\n";
out << indent << FormatKV("library_build_type", build_type);
out << ",\n";
// NOTE: our json schema is not strictly tied to the library version!
out << indent << FormatKV("json_schema_version", 1);
std::map<std::string, std::string>* global_context =
internal::GetGlobalContext();
if (global_context != nullptr) {
for (const auto& kv : *global_context) {
out << ",\n";
out << indent << FormatKV(kv.first, kv.second);
}
}
out << "\n";
// Close context block and open the list of benchmarks.
out << inner_indent << "},\n";
out << inner_indent << "\"benchmarks\": [\n";
@@ -166,7 +245,11 @@ void JSONReporter::PrintRunData(Run const& run) {
std::string indent(6, ' ');
std::ostream& out = GetOutputStream();
out << indent << FormatKV("name", run.benchmark_name()) << ",\n";
out << indent << FormatKV("run_name", run.run_name) << ",\n";
out << indent << FormatKV("family_index", run.family_index) << ",\n";
out << indent
<< FormatKV("per_family_instance_index", run.per_family_instance_index)
<< ",\n";
out << indent << FormatKV("run_name", run.run_name.str()) << ",\n";
out << indent << FormatKV("run_type", [&run]() -> const char* {
switch (run.run_type) {
case BenchmarkReporter::Run::RT_Iteration:
@@ -176,17 +259,44 @@ void JSONReporter::PrintRunData(Run const& run) {
}
BENCHMARK_UNREACHABLE();
}()) << ",\n";
out << indent << FormatKV("repetitions", run.repetitions) << ",\n";
if (run.run_type != BenchmarkReporter::Run::RT_Aggregate) {
out << indent << FormatKV("repetition_index", run.repetition_index)
<< ",\n";
}
out << indent << FormatKV("threads", run.threads) << ",\n";
if (run.run_type == BenchmarkReporter::Run::RT_Aggregate) {
out << indent << FormatKV("aggregate_name", run.aggregate_name) << ",\n";
out << indent << FormatKV("aggregate_unit", [&run]() -> const char* {
switch (run.aggregate_unit) {
case StatisticUnit::kTime:
return "time";
case StatisticUnit::kPercentage:
return "percentage";
}
BENCHMARK_UNREACHABLE();
}()) << ",\n";
}
if (run.error_occurred) {
out << indent << FormatKV("error_occurred", run.error_occurred) << ",\n";
out << indent << FormatKV("error_message", run.error_message) << ",\n";
if (internal::SkippedWithError == run.skipped) {
out << indent << FormatKV("error_occurred", true) << ",\n";
out << indent << FormatKV("error_message", run.skip_message) << ",\n";
} else if (internal::SkippedWithMessage == run.skipped) {
out << indent << FormatKV("skipped", true) << ",\n";
out << indent << FormatKV("skip_message", run.skip_message) << ",\n";
}
if (!run.report_big_o && !run.report_rms) {
out << indent << FormatKV("iterations", run.iterations) << ",\n";
out << indent << FormatKV("real_time", run.GetAdjustedRealTime()) << ",\n";
out << indent << FormatKV("cpu_time", run.GetAdjustedCPUTime());
if (run.run_type != Run::RT_Aggregate ||
run.aggregate_unit == StatisticUnit::kTime) {
out << indent << FormatKV("real_time", run.GetAdjustedRealTime())
<< ",\n";
out << indent << FormatKV("cpu_time", run.GetAdjustedCPUTime());
} else {
assert(run.aggregate_unit == StatisticUnit::kPercentage);
out << indent << FormatKV("real_time", run.real_accumulated_time)
<< ",\n";
out << indent << FormatKV("cpu_time", run.cpu_accumulated_time);
}
out << ",\n"
<< indent << FormatKV("time_unit", GetTimeUnitString(run.time_unit));
} else if (run.report_big_o) {
@@ -200,13 +310,26 @@ void JSONReporter::PrintRunData(Run const& run) {
out << indent << FormatKV("rms", run.GetAdjustedCPUTime());
}
for (auto& c : run.counters) {
for (const auto& c : run.counters) {
out << ",\n" << indent << FormatKV(c.first, c.second);
}
if (run.has_memory_result) {
if (run.memory_result.memory_iterations > 0) {
const auto& memory_result = run.memory_result;
out << ",\n" << indent << FormatKV("allocs_per_iter", run.allocs_per_iter);
out << ",\n" << indent << FormatKV("max_bytes_used", run.max_bytes_used);
out << ",\n"
<< indent << FormatKV("max_bytes_used", memory_result.max_bytes_used);
auto report_if_present = [&out, &indent](const std::string& label,
int64_t val) {
if (val != MemoryManager::TombstoneValue) {
out << ",\n" << indent << FormatKV(label, val);
}
};
report_if_present("total_allocated_bytes",
memory_result.total_allocated_bytes);
report_if_present("net_heap_growth", memory_result.net_heap_growth);
}
if (!run.report_label.empty()) {

View File

@@ -4,8 +4,6 @@
#include <iostream>
#include <ostream>
#include "benchmark/benchmark.h"
namespace benchmark {
namespace internal {
@@ -23,7 +21,11 @@ class LogType {
private:
LogType(std::ostream* out) : out_(out) {}
std::ostream* out_;
BENCHMARK_DISALLOW_COPY_AND_ASSIGN(LogType);
// NOTE: we could use BENCHMARK_DISALLOW_COPY_AND_ASSIGN but we shouldn't have
// a dependency on benchmark.h from here.
LogType(const LogType&) = delete;
LogType& operator=(const LogType&) = delete;
};
template <class Tp>
@@ -47,13 +49,13 @@ inline int& LogLevel() {
}
inline LogType& GetNullLogInstance() {
static LogType log(nullptr);
return log;
static LogType null_log(static_cast<std::ostream*>(nullptr));
return null_log;
}
inline LogType& GetErrorLogInstance() {
static LogType log(&std::clog);
return log;
static LogType error_log(&std::clog);
return error_log;
}
inline LogType& GetLogInstanceForLevel(int level) {
@@ -67,7 +69,7 @@ inline LogType& GetLogInstanceForLevel(int level) {
} // end namespace benchmark
// clang-format off
#define VLOG(x) \
#define BM_VLOG(x) \
(::benchmark::internal::GetLogInstanceForLevel(x) << "-- LOG(" << x << "):" \
" ")
// clang-format on

View File

@@ -9,60 +9,60 @@
// Enable thread safety attributes only with clang.
// The attributes can be safely erased when compiling with other compilers.
#if defined(HAVE_THREAD_SAFETY_ATTRIBUTES)
#define THREAD_ANNOTATION_ATTRIBUTE__(x) __attribute__((x))
#define THREAD_ANNOTATION_ATTRIBUTE_(x) __attribute__((x))
#else
#define THREAD_ANNOTATION_ATTRIBUTE__(x) // no-op
#define THREAD_ANNOTATION_ATTRIBUTE_(x) // no-op
#endif
#define CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(capability(x))
#define CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(capability(x))
#define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable)
#define SCOPED_CAPABILITY THREAD_ANNOTATION_ATTRIBUTE_(scoped_lockable)
#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x))
#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(guarded_by(x))
#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded_by(x))
#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE_(pt_guarded_by(x))
#define ACQUIRED_BEFORE(...) \
THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(__VA_ARGS__))
THREAD_ANNOTATION_ATTRIBUTE_(acquired_before(__VA_ARGS__))
#define ACQUIRED_AFTER(...) \
THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(__VA_ARGS__))
THREAD_ANNOTATION_ATTRIBUTE_(acquired_after(__VA_ARGS__))
#define REQUIRES(...) \
THREAD_ANNOTATION_ATTRIBUTE__(requires_capability(__VA_ARGS__))
THREAD_ANNOTATION_ATTRIBUTE_(requires_capability(__VA_ARGS__))
#define REQUIRES_SHARED(...) \
THREAD_ANNOTATION_ATTRIBUTE__(requires_shared_capability(__VA_ARGS__))
THREAD_ANNOTATION_ATTRIBUTE_(requires_shared_capability(__VA_ARGS__))
#define ACQUIRE(...) \
THREAD_ANNOTATION_ATTRIBUTE__(acquire_capability(__VA_ARGS__))
THREAD_ANNOTATION_ATTRIBUTE_(acquire_capability(__VA_ARGS__))
#define ACQUIRE_SHARED(...) \
THREAD_ANNOTATION_ATTRIBUTE__(acquire_shared_capability(__VA_ARGS__))
THREAD_ANNOTATION_ATTRIBUTE_(acquire_shared_capability(__VA_ARGS__))
#define RELEASE(...) \
THREAD_ANNOTATION_ATTRIBUTE__(release_capability(__VA_ARGS__))
THREAD_ANNOTATION_ATTRIBUTE_(release_capability(__VA_ARGS__))
#define RELEASE_SHARED(...) \
THREAD_ANNOTATION_ATTRIBUTE__(release_shared_capability(__VA_ARGS__))
THREAD_ANNOTATION_ATTRIBUTE_(release_shared_capability(__VA_ARGS__))
#define TRY_ACQUIRE(...) \
THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_capability(__VA_ARGS__))
THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_capability(__VA_ARGS__))
#define TRY_ACQUIRE_SHARED(...) \
THREAD_ANNOTATION_ATTRIBUTE__(try_acquire_shared_capability(__VA_ARGS__))
THREAD_ANNOTATION_ATTRIBUTE_(try_acquire_shared_capability(__VA_ARGS__))
#define EXCLUDES(...) THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(__VA_ARGS__))
#define EXCLUDES(...) THREAD_ANNOTATION_ATTRIBUTE_(locks_excluded(__VA_ARGS__))
#define ASSERT_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(assert_capability(x))
#define ASSERT_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(assert_capability(x))
#define ASSERT_SHARED_CAPABILITY(x) \
THREAD_ANNOTATION_ATTRIBUTE__(assert_shared_capability(x))
THREAD_ANNOTATION_ATTRIBUTE_(assert_shared_capability(x))
#define RETURN_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x))
#define RETURN_CAPABILITY(x) THREAD_ANNOTATION_ATTRIBUTE_(lock_returned(x))
#define NO_THREAD_SAFETY_ANALYSIS \
THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis)
THREAD_ANNOTATION_ATTRIBUTE_(no_thread_safety_analysis)
namespace benchmark {
@@ -71,7 +71,7 @@ typedef std::condition_variable Condition;
// NOTE: Wrappers for std::mutex and std::unique_lock are provided so that
// we can annotate them with thread safety attributes and use the
// -Wthread-safety warning with clang. The standard library types cannot be
// used directly because they do not provided the required annotations.
// used directly because they do not provide the required annotations.
class CAPABILITY("mutex") Mutex {
public:
Mutex() {}
@@ -130,7 +130,7 @@ class Barrier {
// entered the barrier. Returns iff this is the last thread to
// enter the barrier.
bool createBarrier(MutexLock& ml) REQUIRES(lock_) {
CHECK_LT(entered_, running_threads_);
BM_CHECK_LT(entered_, running_threads_);
entered_++;
if (entered_ < running_threads_) {
// Wait for all threads to enter

View File

@@ -0,0 +1,282 @@
// Copyright 2021 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "perf_counters.h"
#include <cstring>
#include <memory>
#include <vector>
#if defined HAVE_LIBPFM
#include "perfmon/pfmlib.h"
#include "perfmon/pfmlib_perf_event.h"
#endif
namespace benchmark {
namespace internal {
#if defined HAVE_LIBPFM
size_t PerfCounterValues::Read(const std::vector<int>& leaders) {
// Create a pointer for multiple reads
const size_t bufsize = values_.size() * sizeof(values_[0]);
char* ptr = reinterpret_cast<char*>(values_.data());
size_t size = bufsize;
for (int lead : leaders) {
auto read_bytes = ::read(lead, ptr, size);
if (read_bytes >= ssize_t(sizeof(uint64_t))) {
// Actual data bytes are all bytes minus initial padding
std::size_t data_bytes =
static_cast<std::size_t>(read_bytes) - sizeof(uint64_t);
// This should be very cheap since it's in hot cache
std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes);
// Increment our counters
ptr += data_bytes;
size -= data_bytes;
} else {
int err = errno;
GetErrorLogInstance() << "Error reading lead " << lead << " errno:" << err
<< " " << ::strerror(err) << "\n";
return 0;
}
}
return (bufsize - size) / sizeof(uint64_t);
}
const bool PerfCounters::kSupported = true;
// Initializes libpfm only on the first call. Returns whether that single
// initialization was successful.
bool PerfCounters::Initialize() {
// Function-scope static gets initialized only once on first call.
static const bool success = []() {
return pfm_initialize() == PFM_SUCCESS;
}();
return success;
}
bool PerfCounters::IsCounterSupported(const std::string& name) {
Initialize();
perf_event_attr_t attr;
std::memset(&attr, 0, sizeof(attr));
pfm_perf_encode_arg_t arg;
std::memset(&arg, 0, sizeof(arg));
arg.attr = &attr;
const int mode = PFM_PLM3; // user mode only
int ret = pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT_EXT,
&arg);
return (ret == PFM_SUCCESS);
}
PerfCounters PerfCounters::Create(
const std::vector<std::string>& counter_names) {
if (!counter_names.empty()) {
Initialize();
}
// Valid counters will populate these arrays but we start empty
std::vector<std::string> valid_names;
std::vector<int> counter_ids;
std::vector<int> leader_ids;
// Resize to the maximum possible
valid_names.reserve(counter_names.size());
counter_ids.reserve(counter_names.size());
const int kCounterMode = PFM_PLM3; // user mode only
// Group leads will be assigned on demand. The idea is that once we cannot
// create a counter descriptor, the reason is that this group has maxed out
// so we set the group_id again to -1 and retry - giving the algorithm a
// chance to create a new group leader to hold the next set of counters.
int group_id = -1;
// Loop through all performance counters
for (size_t i = 0; i < counter_names.size(); ++i) {
// we are about to push into the valid names vector
// check if we did not reach the maximum
if (valid_names.size() == PerfCounterValues::kMaxCounters) {
// Log a message if we maxed out and stop adding
GetErrorLogInstance()
<< counter_names.size() << " counters were requested. The maximum is "
<< PerfCounterValues::kMaxCounters << " and " << valid_names.size()
<< " were already added. All remaining counters will be ignored\n";
// stop the loop and return what we have already
break;
}
// Check if this name is empty
const auto& name = counter_names[i];
if (name.empty()) {
GetErrorLogInstance()
<< "A performance counter name was the empty string\n";
continue;
}
// Here first means first in group, ie the group leader
const bool is_first = (group_id < 0);
// This struct will be populated by libpfm from the counter string
// and then fed into the syscall perf_event_open
struct perf_event_attr attr {};
attr.size = sizeof(attr);
// This is the input struct to libpfm.
pfm_perf_encode_arg_t arg{};
arg.attr = &attr;
const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode,
PFM_OS_PERF_EVENT, &arg);
if (pfm_get != PFM_SUCCESS) {
GetErrorLogInstance()
<< "Unknown performance counter name: " << name << "\n";
continue;
}
// We then proceed to populate the remaining fields in our attribute struct
// Note: the man page for perf_event_create suggests inherit = true and
// read_format = PERF_FORMAT_GROUP don't work together, but that's not the
// case.
attr.disabled = is_first;
attr.inherit = true;
attr.pinned = is_first;
attr.exclude_kernel = true;
attr.exclude_user = false;
attr.exclude_hv = true;
// Read all counters in a group in one read.
attr.read_format = PERF_FORMAT_GROUP; //| PERF_FORMAT_TOTAL_TIME_ENABLED |
// PERF_FORMAT_TOTAL_TIME_RUNNING;
int id = -1;
while (id < 0) {
static constexpr size_t kNrOfSyscallRetries = 5;
// Retry syscall as it was interrupted often (b/64774091).
for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
++num_retries) {
id = perf_event_open(&attr, 0, -1, group_id, 0);
if (id >= 0 || errno != EINTR) {
break;
}
}
if (id < 0) {
// If the file descriptor is negative we might have reached a limit
// in the current group. Set the group_id to -1 and retry
if (group_id >= 0) {
// Create a new group
group_id = -1;
} else {
// At this point we have already retried to set a new group id and
// failed. We then give up.
break;
}
}
}
// We failed to get a new file descriptor. We might have reached a hard
// hardware limit that cannot be resolved even with group multiplexing
if (id < 0) {
GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor "
"for performance counter "
<< name << ". Ignoring\n";
// We give up on this counter but try to keep going
// as the others would be fine
continue;
}
if (group_id < 0) {
// This is a leader, store and assign it to the current file descriptor
leader_ids.push_back(id);
group_id = id;
}
// This is a valid counter, add it to our descriptor's list
counter_ids.push_back(id);
valid_names.push_back(name);
}
// Loop through all group leaders activating them
// There is another option of starting ALL counters in a process but
// that would be far reaching an intrusion. If the user is using PMCs
// by themselves then this would have a side effect on them. It is
// friendlier to loop through all groups individually.
for (int lead : leader_ids) {
if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) {
// This should never happen but if it does, we give up on the
// entire batch as recovery would be a mess.
GetErrorLogInstance() << "***WARNING*** Failed to start counters. "
"Claring out all counters.\n";
// Close all performance counters
for (int id : counter_ids) {
::close(id);
}
// Return an empty object so our internal state is still good and
// the process can continue normally without impact
return NoCounters();
}
}
return PerfCounters(std::move(valid_names), std::move(counter_ids),
std::move(leader_ids));
}
void PerfCounters::CloseCounters() const {
if (counter_ids_.empty()) {
return;
}
for (int lead : leader_ids_) {
ioctl(lead, PERF_EVENT_IOC_DISABLE);
}
for (int fd : counter_ids_) {
close(fd);
}
}
#else // defined HAVE_LIBPFM
size_t PerfCounterValues::Read(const std::vector<int>&) { return 0; }
const bool PerfCounters::kSupported = false;
bool PerfCounters::Initialize() { return false; }
bool PerfCounters::IsCounterSupported(const std::string&) { return false; }
PerfCounters PerfCounters::Create(
const std::vector<std::string>& counter_names) {
if (!counter_names.empty()) {
GetErrorLogInstance() << "Performance counters not supported.\n";
}
return NoCounters();
}
void PerfCounters::CloseCounters() const {}
#endif // defined HAVE_LIBPFM
PerfCountersMeasurement::PerfCountersMeasurement(
const std::vector<std::string>& counter_names)
: start_values_(counter_names.size()), end_values_(counter_names.size()) {
counters_ = PerfCounters::Create(counter_names);
}
PerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
if (this != &other) {
CloseCounters();
counter_ids_ = std::move(other.counter_ids_);
leader_ids_ = std::move(other.leader_ids_);
counter_names_ = std::move(other.counter_names_);
}
return *this;
}
} // namespace internal
} // namespace benchmark

View File

@@ -0,0 +1,200 @@
// Copyright 2021 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef BENCHMARK_PERF_COUNTERS_H
#define BENCHMARK_PERF_COUNTERS_H
#include <array>
#include <cstdint>
#include <cstring>
#include <memory>
#include <vector>
#include "benchmark/benchmark.h"
#include "check.h"
#include "log.h"
#include "mutex.h"
#ifndef BENCHMARK_OS_WINDOWS
#include <unistd.h>
#endif
#if defined(_MSC_VER)
#pragma warning(push)
// C4251: <symbol> needs to have dll-interface to be used by clients of class
#pragma warning(disable : 4251)
#endif
namespace benchmark {
namespace internal {
// Typically, we can only read a small number of counters. There is also a
// padding preceding counter values, when reading multiple counters with one
// syscall (which is desirable). PerfCounterValues abstracts these details.
// The implementation ensures the storage is inlined, and allows 0-based
// indexing into the counter values.
// The object is used in conjunction with a PerfCounters object, by passing it
// to Snapshot(). The Read() method relocates individual reads, discarding
// the initial padding from each group leader in the values buffer such that
// all user accesses through the [] operator are correct.
class BENCHMARK_EXPORT PerfCounterValues {
public:
explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) {
BM_CHECK_LE(nr_counters_, kMaxCounters);
}
// We are reading correctly now so the values don't need to skip padding
uint64_t operator[](size_t pos) const { return values_[pos]; }
// Increased the maximum to 32 only since the buffer
// is std::array<> backed
static constexpr size_t kMaxCounters = 32;
private:
friend class PerfCounters;
// Get the byte buffer in which perf counters can be captured.
// This is used by PerfCounters::Read
std::pair<char*, size_t> get_data_buffer() {
return {reinterpret_cast<char*>(values_.data()),
sizeof(uint64_t) * (kPadding + nr_counters_)};
}
// This reading is complex and as the goal of this class is to
// abstract away the intrincacies of the reading process, this is
// a better place for it
size_t Read(const std::vector<int>& leaders);
// Move the padding to 2 due to the reading algorithm (1st padding plus a
// current read padding)
static constexpr size_t kPadding = 2;
std::array<uint64_t, kPadding + kMaxCounters> values_;
const size_t nr_counters_;
};
// Collect PMU counters. The object, once constructed, is ready to be used by
// calling read(). PMU counter collection is enabled from the time create() is
// called, to obtain the object, until the object's destructor is called.
class BENCHMARK_EXPORT PerfCounters final {
public:
// True iff this platform supports performance counters.
static const bool kSupported;
// Returns an empty object
static PerfCounters NoCounters() { return PerfCounters(); }
~PerfCounters() { CloseCounters(); }
PerfCounters() = default;
PerfCounters(PerfCounters&&) = default;
PerfCounters(const PerfCounters&) = delete;
PerfCounters& operator=(PerfCounters&&) noexcept;
PerfCounters& operator=(const PerfCounters&) = delete;
// Platform-specific implementations may choose to do some library
// initialization here.
static bool Initialize();
// Check if the given counter is supported, if the app wants to
// check before passing
static bool IsCounterSupported(const std::string& name);
// Return a PerfCounters object ready to read the counters with the names
// specified. The values are user-mode only. The counter name format is
// implementation and OS specific.
// In case of failure, this method will in the worst case return an
// empty object whose state will still be valid.
static PerfCounters Create(const std::vector<std::string>& counter_names);
// Take a snapshot of the current value of the counters into the provided
// valid PerfCounterValues storage. The values are populated such that:
// names()[i]'s value is (*values)[i]
BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const {
#ifndef BENCHMARK_OS_WINDOWS
assert(values != nullptr);
return values->Read(leader_ids_) == counter_ids_.size();
#else
(void)values;
return false;
#endif
}
const std::vector<std::string>& names() const { return counter_names_; }
size_t num_counters() const { return counter_names_.size(); }
private:
PerfCounters(const std::vector<std::string>& counter_names,
std::vector<int>&& counter_ids, std::vector<int>&& leader_ids)
: counter_ids_(std::move(counter_ids)),
leader_ids_(std::move(leader_ids)),
counter_names_(counter_names) {}
void CloseCounters() const;
std::vector<int> counter_ids_;
std::vector<int> leader_ids_;
std::vector<std::string> counter_names_;
};
// Typical usage of the above primitives.
class BENCHMARK_EXPORT PerfCountersMeasurement final {
public:
PerfCountersMeasurement(const std::vector<std::string>& counter_names);
size_t num_counters() const { return counters_.num_counters(); }
std::vector<std::string> names() const { return counters_.names(); }
BENCHMARK_ALWAYS_INLINE bool Start() {
if (num_counters() == 0) return true;
// Tell the compiler to not move instructions above/below where we take
// the snapshot.
ClobberMemory();
valid_read_ &= counters_.Snapshot(&start_values_);
ClobberMemory();
return valid_read_;
}
BENCHMARK_ALWAYS_INLINE bool Stop(
std::vector<std::pair<std::string, double>>& measurements) {
if (num_counters() == 0) return true;
// Tell the compiler to not move instructions above/below where we take
// the snapshot.
ClobberMemory();
valid_read_ &= counters_.Snapshot(&end_values_);
ClobberMemory();
for (size_t i = 0; i < counters_.names().size(); ++i) {
double measurement = static_cast<double>(end_values_[i]) -
static_cast<double>(start_values_[i]);
measurements.push_back({counters_.names()[i], measurement});
}
return valid_read_;
}
private:
PerfCounters counters_;
bool valid_read_ = true;
PerfCounterValues start_values_;
PerfCounterValues end_values_;
};
} // namespace internal
} // namespace benchmark
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
#endif // BENCHMARK_PERF_COUNTERS_H

View File

@@ -33,7 +33,7 @@
// Prefer C regex libraries when compiling w/o exceptions so that we can
// correctly report errors.
#if defined(BENCHMARK_HAS_NO_EXCEPTIONS) && \
defined(BENCHMARK_HAVE_STD_REGEX) && \
defined(HAVE_STD_REGEX) && \
(defined(HAVE_GNU_POSIX_REGEX) || defined(HAVE_POSIX_REGEX))
#undef HAVE_STD_REGEX
#endif
@@ -121,15 +121,13 @@ inline bool Regex::Init(const std::string& spec, std::string* error) {
if (ec != 0) {
if (error) {
size_t needed = regerror(ec, &re_, nullptr, 0);
char* errbuf = new char[needed];
regerror(ec, &re_, errbuf, needed);
std::vector<char> errbuf(needed);
regerror(ec, &re_, errbuf.data(), needed);
// regerror returns the number of bytes necessary to null terminate
// the string, so we move that when assigning to error.
CHECK_NE(needed, 0);
error->assign(errbuf, needed - 1);
delete[] errbuf;
BM_CHECK_NE(needed, 0);
error->assign(errbuf.data(), needed - 1);
}
return false;

View File

@@ -12,17 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "benchmark/benchmark.h"
#include "timers.h"
#include <cstdlib>
#include <iostream>
#include <map>
#include <string>
#include <tuple>
#include <vector>
#include "benchmark/benchmark.h"
#include "check.h"
#include "string_util.h"
#include "timers.h"
namespace benchmark {
@@ -33,25 +33,32 @@ BenchmarkReporter::~BenchmarkReporter() {}
void BenchmarkReporter::PrintBasicContext(std::ostream *out,
Context const &context) {
CHECK(out) << "cannot be null";
BM_CHECK(out) << "cannot be null";
auto &Out = *out;
#ifndef BENCHMARK_OS_QURT
// Date/time information is not available on QuRT.
// Attempting to get it via this call cause the binary to crash.
Out << LocalDateTimeString() << "\n";
#endif
if (context.executable_name)
Out << "Running " << context.executable_name << "\n";
if (benchmark::BenchmarkReporter::Context::executable_name != nullptr) {
Out << "Running " << benchmark::BenchmarkReporter::Context::executable_name
<< "\n";
}
const CPUInfo &info = context.cpu_info;
Out << "Run on (" << info.num_cpus << " X "
<< (info.cycles_per_second / 1000000.0) << " MHz CPU "
<< ((info.num_cpus > 1) ? "s" : "") << ")\n";
if (info.caches.size() != 0) {
if (!info.caches.empty()) {
Out << "CPU Caches:\n";
for (auto &CInfo : info.caches) {
for (const auto &CInfo : info.caches) {
Out << " L" << CInfo.level << " " << CInfo.type << " "
<< (CInfo.size / 1000) << "K";
if (CInfo.num_sharing != 0)
<< (CInfo.size / 1024) << " KiB";
if (CInfo.num_sharing != 0) {
Out << " (x" << (info.num_cpus / CInfo.num_sharing) << ")";
}
Out << "\n";
}
}
@@ -59,17 +66,34 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out,
Out << "Load Average: ";
for (auto It = info.load_avg.begin(); It != info.load_avg.end();) {
Out << StrFormat("%.2f", *It++);
if (It != info.load_avg.end()) Out << ", ";
if (It != info.load_avg.end()) {
Out << ", ";
}
}
Out << "\n";
}
if (info.scaling_enabled) {
std::map<std::string, std::string> *global_context =
internal::GetGlobalContext();
if (global_context != nullptr) {
for (const auto &kv : *global_context) {
Out << kv.first << ": " << kv.second << "\n";
}
}
if (CPUInfo::Scaling::ENABLED == info.scaling) {
Out << "***WARNING*** CPU scaling is enabled, the benchmark "
"real time measurements may be noisy and will incur extra "
"overhead.\n";
}
const SystemInfo &sysinfo = context.sys_info;
if (SystemInfo::ASLR::ENABLED == sysinfo.ASLRStatus) {
Out << "***WARNING*** ASLR is enabled, the results may have unreproducible "
"noise in them.\n";
}
#ifndef NDEBUG
Out << "***WARNING*** Library was built as DEBUG. Timings may be "
"affected.\n";
@@ -79,10 +103,11 @@ void BenchmarkReporter::PrintBasicContext(std::ostream *out,
// No initializer because it's already initialized to NULL.
const char *BenchmarkReporter::Context::executable_name;
BenchmarkReporter::Context::Context() : cpu_info(CPUInfo::Get()) {}
BenchmarkReporter::Context::Context()
: cpu_info(CPUInfo::Get()), sys_info(SystemInfo::Get()) {}
std::string BenchmarkReporter::Run::benchmark_name() const {
std::string name = run_name;
std::string name = run_name.str();
if (run_type == RT_Aggregate) {
name += "_" + aggregate_name;
}
@@ -91,13 +116,17 @@ std::string BenchmarkReporter::Run::benchmark_name() const {
double BenchmarkReporter::Run::GetAdjustedRealTime() const {
double new_time = real_accumulated_time * GetTimeUnitMultiplier(time_unit);
if (iterations != 0) new_time /= static_cast<double>(iterations);
if (iterations != 0) {
new_time /= static_cast<double>(iterations);
}
return new_time;
}
double BenchmarkReporter::Run::GetAdjustedCPUTime() const {
double new_time = cpu_accumulated_time * GetTimeUnitMultiplier(time_unit);
if (iterations != 0) new_time /= static_cast<double>(iterations);
if (iterations != 0) {
new_time /= static_cast<double>(iterations);
}
return new_time;
}

Some files were not shown because too many files have changed in this diff Show More