Adds multithreading.
This commit is contained in:
parent
cfb66589f7
commit
a4736467f2
3
.vscode/settings.json
vendored
3
.vscode/settings.json
vendored
@ -83,6 +83,7 @@
|
||||
"stack": "cpp",
|
||||
"cfenv": "cpp",
|
||||
"typeindex": "cpp",
|
||||
"valarray": "cpp"
|
||||
"valarray": "cpp",
|
||||
"csignal": "cpp"
|
||||
}
|
||||
}
|
||||
@ -6,7 +6,8 @@ find_package(Threads REQUIRED)
|
||||
|
||||
add_executable(Sorter framework/runner.cpp
|
||||
src/container.cpp src/container.hpp
|
||||
src/sorter.cpp src/sorter.hpp)
|
||||
src/sorter.cpp src/sorter.hpp
|
||||
src/thread_pool.cpp src/thread_pool.hpp)
|
||||
target_link_libraries(Sorter PUBLIC Threads::Threads)
|
||||
|
||||
target_compile_features(Sorter PRIVATE cxx_std_20)
|
||||
@ -50,6 +51,8 @@ add_custom_command(
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/container.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/sorter.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/sorter.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/thread_pool.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/thread_pool.hpp
|
||||
${plot_file}
|
||||
${CMAKE_CURRENT_LIST_DIR}/description.md
|
||||
COMMENT "Creating submission"
|
||||
@ -71,6 +74,8 @@ set(dist_file_list
|
||||
src/container.hpp
|
||||
src/sorter.cpp
|
||||
src/sorter.hpp
|
||||
src/thread_pool.cpp
|
||||
src/thread_pool.hpp
|
||||
)
|
||||
|
||||
set(framework_dist_file "ae-sorting.zip")
|
||||
|
||||
2
eval.py
2
eval.py
@ -7,7 +7,7 @@ from pathlib import Path
|
||||
|
||||
def run_experiment(output_file, build_dir):
|
||||
# The number of threads is not currently used, it's just here in case you want to parallelize your code.
|
||||
for threads in [1]:
|
||||
for threads in [1, 2, 4, 8, 12, 16]:
|
||||
# for size in [1e2, 1e3, 1e4 + 1, 1e5, 1e6 - 1, 1e7]:
|
||||
for size in [1e2, 1e3, 1e4 + 1, 1e5, 1e7]:
|
||||
print("Measuring p=" + str(threads) + " n=" + str(size))
|
||||
|
||||
@ -62,7 +62,7 @@ void runExperiment(std::string_view name,
|
||||
std::chrono::steady_clock::time_point ctor = std::chrono::steady_clock::now();
|
||||
auto to_sort = container_factory(input);
|
||||
std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
|
||||
sort_func(to_sort);
|
||||
sort_func(to_sort, num_threads);
|
||||
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
|
||||
totalNanoseconds +=
|
||||
std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin)
|
||||
@ -95,8 +95,8 @@ int main(int argc, char **argv) {
|
||||
[](const auto& data) {
|
||||
return ae::container(data);
|
||||
},
|
||||
[](ae::container& data) {
|
||||
ae::sorter{}.sort(data);
|
||||
[](ae::container& data, auto num_threads) {
|
||||
ae::sorter(num_threads).sort(data);
|
||||
}, argc, argv);
|
||||
|
||||
return 0;
|
||||
|
||||
35
result.txt
35
result.txt
@ -1,5 +1,30 @@
|
||||
RESULT name=sort n=100 t=1 iterations=301 durationNanoseconds=3324 totalDurationNanoseconds=1000561 constructorNanoseconds=274 totalConstructorNanoseconds=82660
|
||||
RESULT name=sort n=1000 t=1 iterations=13 durationNanoseconds=77557 totalDurationNanoseconds=1008241 constructorNanoseconds=2057 totalConstructorNanoseconds=26750
|
||||
RESULT name=sort n=10001 t=1 iterations=1 durationNanoseconds=1509911 totalDurationNanoseconds=1509911 constructorNanoseconds=108831 totalConstructorNanoseconds=108831
|
||||
RESULT name=sort n=100000 t=1 iterations=1 durationNanoseconds=8070546 totalDurationNanoseconds=8070546 constructorNanoseconds=488620 totalConstructorNanoseconds=488620
|
||||
RESULT name=sort n=10000000 t=1 iterations=1 durationNanoseconds=723407878 totalDurationNanoseconds=723407878 constructorNanoseconds=51148616 totalConstructorNanoseconds=51148616
|
||||
RESULT name=sort n=100 t=1 iterations=18 durationNanoseconds=57856 totalDurationNanoseconds=1041410 constructorNanoseconds=513 totalConstructorNanoseconds=9250
|
||||
RESULT name=sort n=1000 t=1 iterations=8 durationNanoseconds=126883 totalDurationNanoseconds=1015070 constructorNanoseconds=1475 totalConstructorNanoseconds=11800
|
||||
RESULT name=sort n=10001 t=1 iterations=2 durationNanoseconds=615565 totalDurationNanoseconds=1231131 constructorNanoseconds=30890 totalConstructorNanoseconds=61780
|
||||
RESULT name=sort n=100000 t=1 iterations=1 durationNanoseconds=9488107 totalDurationNanoseconds=9488107 constructorNanoseconds=524961 totalConstructorNanoseconds=524961
|
||||
RESULT name=sort n=10000000 t=1 iterations=1 durationNanoseconds=1416091993 totalDurationNanoseconds=1416091993 constructorNanoseconds=50440746 totalConstructorNanoseconds=50440746
|
||||
RESULT name=sort n=100 t=2 iterations=16 durationNanoseconds=63258 totalDurationNanoseconds=1012141 constructorNanoseconds=375 totalConstructorNanoseconds=6000
|
||||
RESULT name=sort n=1000 t=2 iterations=7 durationNanoseconds=154110 totalDurationNanoseconds=1078770 constructorNanoseconds=2008 totalConstructorNanoseconds=14060
|
||||
RESULT name=sort n=10001 t=2 iterations=3 durationNanoseconds=451387 totalDurationNanoseconds=1354161 constructorNanoseconds=19620 totalConstructorNanoseconds=58860
|
||||
RESULT name=sort n=100000 t=2 iterations=1 durationNanoseconds=6236655 totalDurationNanoseconds=6236655 constructorNanoseconds=514650 totalConstructorNanoseconds=514650
|
||||
RESULT name=sort n=10000000 t=2 iterations=1 durationNanoseconds=1380325518 totalDurationNanoseconds=1380325518 constructorNanoseconds=50373886 totalConstructorNanoseconds=50373886
|
||||
RESULT name=sort n=100 t=4 iterations=9 durationNanoseconds=118743 totalDurationNanoseconds=1068691 constructorNanoseconds=436 totalConstructorNanoseconds=3930
|
||||
RESULT name=sort n=1000 t=4 iterations=4 durationNanoseconds=272115 totalDurationNanoseconds=1088461 constructorNanoseconds=2415 totalConstructorNanoseconds=9660
|
||||
RESULT name=sort n=10001 t=4 iterations=2 durationNanoseconds=569255 totalDurationNanoseconds=1138510 constructorNanoseconds=29920 totalConstructorNanoseconds=59840
|
||||
RESULT name=sort n=100000 t=4 iterations=1 durationNanoseconds=6598125 totalDurationNanoseconds=6598125 constructorNanoseconds=507180 totalConstructorNanoseconds=507180
|
||||
RESULT name=sort n=10000000 t=4 iterations=1 durationNanoseconds=1300242690 totalDurationNanoseconds=1300242690 constructorNanoseconds=50475097 totalConstructorNanoseconds=50475097
|
||||
RESULT name=sort n=100 t=8 iterations=3 durationNanoseconds=347863 totalDurationNanoseconds=1043591 constructorNanoseconds=2706 totalConstructorNanoseconds=8120
|
||||
RESULT name=sort n=1000 t=8 iterations=2 durationNanoseconds=610620 totalDurationNanoseconds=1221241 constructorNanoseconds=10400 totalConstructorNanoseconds=20800
|
||||
RESULT name=sort n=10001 t=8 iterations=2 durationNanoseconds=706495 totalDurationNanoseconds=1412991 constructorNanoseconds=29600 totalConstructorNanoseconds=59200
|
||||
RESULT name=sort n=100000 t=8 iterations=1 durationNanoseconds=7387085 totalDurationNanoseconds=7387085 constructorNanoseconds=557391 totalConstructorNanoseconds=557391
|
||||
RESULT name=sort n=10000000 t=8 iterations=1 durationNanoseconds=1261560682 totalDurationNanoseconds=1261560682 constructorNanoseconds=49470756 totalConstructorNanoseconds=49470756
|
||||
RESULT name=sort n=100 t=12 iterations=3 durationNanoseconds=432037 totalDurationNanoseconds=1296111 constructorNanoseconds=1170 totalConstructorNanoseconds=3510
|
||||
RESULT name=sort n=1000 t=12 iterations=1 durationNanoseconds=1092461 totalDurationNanoseconds=1092461 constructorNanoseconds=12880 totalConstructorNanoseconds=12880
|
||||
RESULT name=sort n=10001 t=12 iterations=1 durationNanoseconds=1019941 totalDurationNanoseconds=1019941 constructorNanoseconds=54540 totalConstructorNanoseconds=54540
|
||||
RESULT name=sort n=100000 t=12 iterations=1 durationNanoseconds=7159465 totalDurationNanoseconds=7159465 constructorNanoseconds=536730 totalConstructorNanoseconds=536730
|
||||
RESULT name=sort n=10000000 t=12 iterations=1 durationNanoseconds=1503813105 totalDurationNanoseconds=1503813105 constructorNanoseconds=50150056 totalConstructorNanoseconds=50150056
|
||||
RESULT name=sort n=100 t=16 iterations=3 durationNanoseconds=432706 totalDurationNanoseconds=1298120 constructorNanoseconds=3833 totalConstructorNanoseconds=11500
|
||||
RESULT name=sort n=1000 t=16 iterations=2 durationNanoseconds=784875 totalDurationNanoseconds=1569751 constructorNanoseconds=5285 totalConstructorNanoseconds=10570
|
||||
RESULT name=sort n=10001 t=16 iterations=1 durationNanoseconds=1953311 totalDurationNanoseconds=1953311 constructorNanoseconds=59420 totalConstructorNanoseconds=59420
|
||||
RESULT name=sort n=100000 t=16 iterations=1 durationNanoseconds=6820104 totalDurationNanoseconds=6820104 constructorNanoseconds=524961 totalConstructorNanoseconds=524961
|
||||
RESULT name=sort n=10000000 t=16 iterations=1 durationNanoseconds=1352546567 totalDurationNanoseconds=1352546567 constructorNanoseconds=52707158 totalConstructorNanoseconds=52707158
|
||||
|
||||
@ -11,6 +11,11 @@
|
||||
|
||||
namespace ae {
|
||||
|
||||
sorter::sorter(uint32_t num = 1) {
|
||||
sorter::num_threads = num;
|
||||
sorter::pool = new ThreadPool(num);
|
||||
}
|
||||
|
||||
void sorter::sort(container& data) {
|
||||
for (auto i = 1uz; i < data.placeholder_.size(); ++i) {
|
||||
std::ranges::copy(data.placeholder_[i], std::back_inserter(data.placeholder_[0]));
|
||||
@ -18,7 +23,7 @@ void sorter::sort(container& data) {
|
||||
}
|
||||
#if DEBUG
|
||||
for (int i = 0; i < data.placeholder_[0].size(); i++) {
|
||||
// if (copy[i] != data.placeholder_[0][i])
|
||||
if (copy[i] != data.placeholder_[0][i])
|
||||
std::cerr << i << " before:" << data.placeholder_[0][i] << std::endl;
|
||||
}
|
||||
|
||||
@ -27,6 +32,7 @@ void sorter::sort(container& data) {
|
||||
std::sort(copy.begin(), copy.end());
|
||||
#endif
|
||||
sorter::msd_inplace_radix_sort(data.placeholder_[0], 0, [&](auto span) {sorter::robin_hood_sort(span);});
|
||||
while (sorter::pool->size() > 0 || sorter::pool->isWorking()) {};
|
||||
#if DEBUG
|
||||
for (int i = 0; i < copy.size(); i++) {
|
||||
if (copy[i] != data.placeholder_[0][i])
|
||||
@ -169,9 +175,27 @@ void sorter::msd_inplace_radix_sort(
|
||||
|
||||
// sort each bucket recursively
|
||||
for (auto i = 0; i < sorter::RADIX_BUCKETS; i++) {
|
||||
if (sorter::pool != nullptr) {
|
||||
#if DEBUG
|
||||
std::cerr << "Putting in task with depth " << passes << " of bucket " << i << std::endl;
|
||||
#endif
|
||||
auto start = buckets_start[i];
|
||||
auto end = buckets_end[i];
|
||||
sorter::pool->add([start, end, &bucket_sort, passes, this, i](){
|
||||
#if DEBUG
|
||||
std::cerr << "Starting task with depth " << passes << " of bucket " << i << std::endl;
|
||||
#endif
|
||||
sorter::msd_inplace_radix_sort(std::span<container::element_type> (start, end), passes + 1, bucket_sort);
|
||||
#if DEBUG
|
||||
std::cerr << "Finishing task with depth " << passes << " of bucket " << i << std::endl;
|
||||
#endif
|
||||
});
|
||||
} else {
|
||||
sorter::msd_inplace_radix_sort(std::span<container::element_type> (buckets_start[i], buckets_end[i]), passes + 1, bucket_sort);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void sorter::robin_hood_sort(std::span<container::element_type> bucket) {
|
||||
const auto size = bucket.size() + sorter::OVERHEAD_SIZE;
|
||||
const auto mask = ((1L) << (sizeof(container::element_type) * CHAR_BIT - sorter::RADIX_ITERATIONS)) - 1;
|
||||
|
||||
@ -1,8 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include "container.hpp"
|
||||
#include "thread_pool.hpp"
|
||||
#include "functional"
|
||||
#include "math.h"
|
||||
#include <mutex>
|
||||
|
||||
namespace ae {
|
||||
|
||||
@ -11,12 +13,21 @@ class sorter {
|
||||
void sort(container& data);
|
||||
|
||||
// TODO You may add additional functions or data members to the sorter.
|
||||
|
||||
sorter(uint32_t num_threads);
|
||||
|
||||
void msd_inplace_radix_sort(
|
||||
std::span<container::element_type> range,
|
||||
size_t passes,
|
||||
const std::function<void(std::span<container::element_type> bucket)>& bucket_sort
|
||||
);
|
||||
|
||||
void parallel_msd_inplace_radix_sort(
|
||||
std::span<container::element_type> range,
|
||||
size_t passes,
|
||||
const std::function<void(std::span<container::element_type> bucket)>& bucket_sort
|
||||
);
|
||||
|
||||
void msd_inplace_radix_sort_binary(
|
||||
std::span<container::element_type> range,
|
||||
size_t passes,
|
||||
@ -24,10 +35,12 @@ class sorter {
|
||||
);
|
||||
|
||||
const uint32_t OVERHEAD_SIZE = 100L;
|
||||
const uint32_t SMALL_SORT_THRESHHOLD = 32;
|
||||
const uint32_t SMALL_SORT_THRESHHOLD = 100;
|
||||
const uint32_t RADIX_SIZE = 4;
|
||||
const uint32_t RADIX_BUCKETS = std::pow(2, 4);
|
||||
const uint32_t RADIX_ITERATIONS = 8;
|
||||
uint32_t num_threads;
|
||||
ThreadPool* pool = nullptr;
|
||||
|
||||
void robin_hood_sort(std::span<container::element_type> range);
|
||||
};
|
||||
|
||||
58
src/thread_pool.cpp
Normal file
58
src/thread_pool.cpp
Normal file
@ -0,0 +1,58 @@
|
||||
#include "thread_pool.hpp"
|
||||
|
||||
ThreadPool::ThreadPool(size_t num_threads) {
|
||||
states = new bool[num_threads]({ false });
|
||||
for (auto i = 0; i < num_threads; ++i) {
|
||||
threads.emplace_back([this, i] {
|
||||
while (true) {
|
||||
std::function<void()> task;
|
||||
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
|
||||
cv.wait(lock, [this] { return !tasks.empty() || stop; });
|
||||
|
||||
if (tasks.empty() || stop) {
|
||||
return;
|
||||
}
|
||||
|
||||
states[i] = true;
|
||||
|
||||
task = std::move(tasks.front());
|
||||
tasks.pop();
|
||||
|
||||
lock.unlock();
|
||||
task();
|
||||
|
||||
states[i] = false;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
ThreadPool::~ThreadPool() {
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
stop = true;
|
||||
lock.unlock();
|
||||
|
||||
cv.notify_all();
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
void ThreadPool::add(std::function<void()> task) {
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
tasks.emplace(std::move(task));
|
||||
cv.notify_one();
|
||||
lock.unlock();
|
||||
}
|
||||
|
||||
uint32_t ThreadPool::size() { return tasks.size(); }
|
||||
bool ThreadPool::isWorking() {
|
||||
for (auto i = 0; i < threads.size(); i++) {
|
||||
if (states[i]) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
29
src/thread_pool.hpp
Normal file
29
src/thread_pool.hpp
Normal file
@ -0,0 +1,29 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include <queue>
|
||||
#include <thread>
|
||||
#include <functional>
|
||||
#include <condition_variable>
|
||||
|
||||
// This class was inspired by https://www.geeksforgeeks.org/cpp/thread-pool-in-cpp/ (access: 26/09/2025)
|
||||
// to more efficiently handle threads
|
||||
|
||||
class ThreadPool {
|
||||
private:
|
||||
std::vector<std::thread> threads;
|
||||
std::queue<std::function<void()>> tasks;
|
||||
std::mutex mutex;
|
||||
std::condition_variable cv;
|
||||
bool stop = false;
|
||||
bool* states;
|
||||
|
||||
public:
|
||||
ThreadPool(size_t num_threads);
|
||||
~ThreadPool();
|
||||
void add(std::function<void()> task);
|
||||
uint32_t size();
|
||||
bool isWorking();
|
||||
};
|
||||
Loading…
x
Reference in New Issue
Block a user