improve JobSystem::parallel_for + minor optimizations
- parallel_for doesn't use recursion anymore to create the "leaf" jobs, this is now done linearly on N thread (one thread per CPU). This uses less stack space, and reduces miss-predicted branches. - remove almost all SYSTRACE calls because they have a huge impact on things like parallel_for() and are misleading. They can be enabled again by setting HEAVY_SYSTRACE to true.
This commit is contained in:
committed by
Mathias Agopian
parent
2df639133b
commit
8170ca7cd1
@@ -51,7 +51,7 @@ static void BM_JobSystemAsChildren4k(benchmark::State& state) {
|
||||
for (auto _ : state) {
|
||||
auto root = js.create(nullptr, &emptyJob);
|
||||
for (size_t i = 0; i < 4095; i++) {
|
||||
js.run(js.create(root, &emptyJob));
|
||||
js.run(js.create(root, &emptyJob), JobSystem::DONT_SIGNAL);
|
||||
}
|
||||
js.runAndWait(root);
|
||||
}
|
||||
@@ -68,8 +68,8 @@ static void BM_JobSystemParallelFor(benchmark::State& state) {
|
||||
{
|
||||
PerformanceCounters pc(state);
|
||||
for (auto _ : state) {
|
||||
auto job = jobs::parallel_for(js, nullptr, 0, 4096, [](uint32_t start, uint32_t count) {
|
||||
}, jobs::CountSplitter<1>());
|
||||
auto job = jobs::parallel_for(js, nullptr, 0, 4096,
|
||||
[](uint32_t start, uint32_t count) { }, jobs::CountSplitter<1>());
|
||||
js.runAndWait(job);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user