improve JobSystem::parallel_for + minor optimizations

- parallel_for doesn't use recursion anymore to create the "leaf"
jobs, this is now done linearly on N thread (one thread per CPU).
This uses less stack space, and reduces miss-predicted branches.

- remove almost all SYSTRACE calls because they have a huge impact
on things like parallel_for() and are misleading. They can be
enabled again by setting HEAVY_SYSTRACE to true.
This commit is contained in:
Pixelflinger
2019-07-03 15:28:52 -07:00
committed by Mathias Agopian
parent 2df639133b
commit 8170ca7cd1
3 changed files with 66 additions and 56 deletions

View File

@@ -51,7 +51,7 @@ static void BM_JobSystemAsChildren4k(benchmark::State& state) {
for (auto _ : state) {
auto root = js.create(nullptr, &emptyJob);
for (size_t i = 0; i < 4095; i++) {
js.run(js.create(root, &emptyJob));
js.run(js.create(root, &emptyJob), JobSystem::DONT_SIGNAL);
}
js.runAndWait(root);
}
@@ -68,8 +68,8 @@ static void BM_JobSystemParallelFor(benchmark::State& state) {
{
PerformanceCounters pc(state);
for (auto _ : state) {
auto job = jobs::parallel_for(js, nullptr, 0, 4096, [](uint32_t start, uint32_t count) {
}, jobs::CountSplitter<1>());
auto job = jobs::parallel_for(js, nullptr, 0, 4096,
[](uint32_t start, uint32_t count) { }, jobs::CountSplitter<1>());
js.runAndWait(job);
}
}