Compare commits
5 Commits
15bc9836fb
...
ad86784645
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ad86784645 | ||
|
|
c8c0f80286 | ||
|
|
a4736467f2 | ||
|
|
cfb66589f7 | ||
|
|
94a092c17e |
14
.vscode/settings.json
vendored
14
.vscode/settings.json
vendored
@ -72,6 +72,18 @@
|
||||
"cinttypes": "cpp",
|
||||
"typeinfo": "cpp",
|
||||
"variant": "cpp",
|
||||
"bitset": "cpp"
|
||||
"bitset": "cpp",
|
||||
"hash_map": "cpp",
|
||||
"complex": "cpp",
|
||||
"coroutine": "cpp",
|
||||
"forward_list": "cpp",
|
||||
"hash_set": "cpp",
|
||||
"future": "cpp",
|
||||
"shared_mutex": "cpp",
|
||||
"stack": "cpp",
|
||||
"cfenv": "cpp",
|
||||
"typeindex": "cpp",
|
||||
"valarray": "cpp",
|
||||
"csignal": "cpp"
|
||||
}
|
||||
}
|
||||
@ -6,7 +6,11 @@ find_package(Threads REQUIRED)
|
||||
|
||||
add_executable(Sorter framework/runner.cpp
|
||||
src/container.cpp src/container.hpp
|
||||
src/sorter.cpp src/sorter.hpp)
|
||||
src/sorter.cpp src/sorter.hpp
|
||||
src/thread_pool.cpp src/thread_pool.hpp
|
||||
src/single_task_handler.cpp src/single_task_handler.hpp
|
||||
src/task_handler.hpp
|
||||
)
|
||||
target_link_libraries(Sorter PUBLIC Threads::Threads)
|
||||
|
||||
target_compile_features(Sorter PRIVATE cxx_std_20)
|
||||
@ -50,6 +54,11 @@ add_custom_command(
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/container.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/sorter.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/sorter.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/thread_pool.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/thread_pool.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/src/single_task_handler.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/src/single_task_handler.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/src/task_handler.hpp
|
||||
${plot_file}
|
||||
${CMAKE_CURRENT_LIST_DIR}/description.md
|
||||
COMMENT "Creating submission"
|
||||
@ -71,6 +80,11 @@ set(dist_file_list
|
||||
src/container.hpp
|
||||
src/sorter.cpp
|
||||
src/sorter.hpp
|
||||
src/thread_pool.cpp
|
||||
src/thread_pool.hpp
|
||||
src/single_task_handler.cpp
|
||||
src/single_task_handler.hpp
|
||||
src/task_handler.hpp
|
||||
)
|
||||
|
||||
set(framework_dist_file "ae-sorting.zip")
|
||||
|
||||
8
eval.py
8
eval.py
@ -7,9 +7,9 @@ from pathlib import Path
|
||||
|
||||
def run_experiment(output_file, build_dir):
|
||||
# The number of threads is not currently used, it's just here in case you want to parallelize your code.
|
||||
for threads in [1]:
|
||||
for threads in [1, 2, 4, 8, 12, 16]:
|
||||
# for size in [1e2, 1e3, 1e4 + 1, 1e5, 1e6 - 1, 1e7]:
|
||||
for size in [1e2, 1e3, 1e4 + 1, 1e5, 1e7]:
|
||||
for size in [1e2, 1e3, 1e4 + 1, 1e5, 1e7, 1e8]:
|
||||
print("Measuring p=" + str(threads) + " n=" + str(size))
|
||||
executable = Path(build_dir) / "Sorter"
|
||||
returncode = subprocess.call([executable, str(size), str(threads)], stdout=output_file)
|
||||
@ -50,11 +50,11 @@ def make_plot(result_file):
|
||||
axs[i].set_title(f"#p={t}")
|
||||
for name in plots[t]:
|
||||
axs[i].plot(*zip(*plots[t][name]), label=name, marker='x')
|
||||
axs[i].plot(*zip(*plots[t][name + " (constructor)"]), label=name + " (constructor)", marker='+')
|
||||
axs[i].plot(*zip(*plots[t][name]), label=name + " (constructor)", marker='+')
|
||||
axs[i].set_xscale('log')
|
||||
else:
|
||||
axs.plot(*zip(*plots[t][name]), label=name, marker='x')
|
||||
axs.plot(*zip(*plots[t][name + " (constructor)"]), label=name + " (constructor)", marker='+')
|
||||
axs.plot(*zip(*plots[t][name]), label=name + " (constructor)", marker='+')
|
||||
axs.set_xscale('log')
|
||||
|
||||
if len(plots) > 1:
|
||||
|
||||
@ -62,7 +62,7 @@ void runExperiment(std::string_view name,
|
||||
std::chrono::steady_clock::time_point ctor = std::chrono::steady_clock::now();
|
||||
auto to_sort = container_factory(input);
|
||||
std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
|
||||
sort_func(to_sort);
|
||||
sort_func(to_sort, num_threads);
|
||||
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
|
||||
totalNanoseconds +=
|
||||
std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin)
|
||||
@ -95,8 +95,8 @@ int main(int argc, char **argv) {
|
||||
[](const auto& data) {
|
||||
return ae::container(data);
|
||||
},
|
||||
[](ae::container& data) {
|
||||
ae::sorter{}.sort(data);
|
||||
[](ae::container& data, auto num_threads) {
|
||||
ae::sorter(num_threads).sort(data);
|
||||
}, argc, argv);
|
||||
|
||||
return 0;
|
||||
|
||||
5
result.1.txt
Normal file
5
result.1.txt
Normal file
@ -0,0 +1,5 @@
|
||||
RESULT name=sort n=100 t=1 iterations=531 durationNanoseconds=1883 totalDurationNanoseconds=1000180 constructorNanoseconds=225 totalConstructorNanoseconds=119650
|
||||
RESULT name=sort n=1000 t=1 iterations=28 durationNanoseconds=36451 totalDurationNanoseconds=1020630 constructorNanoseconds=805 totalConstructorNanoseconds=22540
|
||||
RESULT name=sort n=10001 t=1 iterations=2 durationNanoseconds=607460 totalDurationNanoseconds=1214920 constructorNanoseconds=29805 totalConstructorNanoseconds=59610
|
||||
RESULT name=sort n=100000 t=1 iterations=1 durationNanoseconds=25963576 totalDurationNanoseconds=25963576 constructorNanoseconds=568500 totalConstructorNanoseconds=568500
|
||||
RESULT name=sort n=10000000 t=1 iterations=1 durationNanoseconds=238699610782 totalDurationNanoseconds=238699610782 constructorNanoseconds=51903183 totalConstructorNanoseconds=51903183
|
||||
36
result.b32.txt
Normal file
36
result.b32.txt
Normal file
@ -0,0 +1,36 @@
|
||||
RESULT name=sort n=100 t=1 iterations=129 durationNanoseconds=7772 totalDurationNanoseconds=1002700 constructorNanoseconds=197 totalConstructorNanoseconds=25540
|
||||
RESULT name=sort n=1000 t=1 iterations=21 durationNanoseconds=49038 totalDurationNanoseconds=1029800 constructorNanoseconds=2039 totalConstructorNanoseconds=42839
|
||||
RESULT name=sort n=10001 t=1 iterations=3 durationNanoseconds=462140 totalDurationNanoseconds=1386420 constructorNanoseconds=56730 totalConstructorNanoseconds=170190
|
||||
RESULT name=sort n=100000 t=1 iterations=1 durationNanoseconds=8417938 totalDurationNanoseconds=8417938 constructorNanoseconds=1179370 totalConstructorNanoseconds=1179370
|
||||
RESULT name=sort n=10000000 t=1 iterations=1 durationNanoseconds=874141313 totalDurationNanoseconds=874141313 constructorNanoseconds=124815857 totalConstructorNanoseconds=124815857
|
||||
RESULT name=sort n=100000000 t=1 iterations=1 durationNanoseconds=16451992962 totalDurationNanoseconds=16451992962 constructorNanoseconds=1368193059 totalConstructorNanoseconds=1368193059
|
||||
RESULT name=sort n=100 t=2 iterations=10 durationNanoseconds=104213 totalDurationNanoseconds=1042130 constructorNanoseconds=913 totalConstructorNanoseconds=9130
|
||||
RESULT name=sort n=1000 t=2 iterations=6 durationNanoseconds=169346 totalDurationNanoseconds=1016080 constructorNanoseconds=8115 totalConstructorNanoseconds=48690
|
||||
RESULT name=sort n=10001 t=2 iterations=2 durationNanoseconds=612194 totalDurationNanoseconds=1224389 constructorNanoseconds=73415 totalConstructorNanoseconds=146830
|
||||
RESULT name=sort n=100000 t=2 iterations=1 durationNanoseconds=6726818 totalDurationNanoseconds=6726818 constructorNanoseconds=1205860 totalConstructorNanoseconds=1205860
|
||||
RESULT name=sort n=10000000 t=2 iterations=1 durationNanoseconds=1233839684 totalDurationNanoseconds=1233839684 constructorNanoseconds=124416317 totalConstructorNanoseconds=124416317
|
||||
RESULT name=sort n=100000000 t=2 iterations=1 durationNanoseconds=12847945920 totalDurationNanoseconds=12847945920 constructorNanoseconds=1375436787 totalConstructorNanoseconds=1375436787
|
||||
RESULT name=sort n=100 t=4 iterations=8 durationNanoseconds=125433 totalDurationNanoseconds=1003470 constructorNanoseconds=362 totalConstructorNanoseconds=2900
|
||||
RESULT name=sort n=1000 t=4 iterations=5 durationNanoseconds=239229 totalDurationNanoseconds=1196149 constructorNanoseconds=7132 totalConstructorNanoseconds=35660
|
||||
RESULT name=sort n=10001 t=4 iterations=3 durationNanoseconds=508916 totalDurationNanoseconds=1526750 constructorNanoseconds=61393 totalConstructorNanoseconds=184180
|
||||
RESULT name=sort n=100000 t=4 iterations=1 durationNanoseconds=6239728 totalDurationNanoseconds=6239728 constructorNanoseconds=1209990 totalConstructorNanoseconds=1209990
|
||||
RESULT name=sort n=10000000 t=4 iterations=1 durationNanoseconds=1181622265 totalDurationNanoseconds=1181622265 constructorNanoseconds=133041285 totalConstructorNanoseconds=133041285
|
||||
RESULT name=sort n=100000000 t=4 iterations=1 durationNanoseconds=8189890498 totalDurationNanoseconds=8189890498 constructorNanoseconds=1363428860 totalConstructorNanoseconds=1363428860
|
||||
RESULT name=sort n=100 t=8 iterations=3 durationNanoseconds=386456 totalDurationNanoseconds=1159370 constructorNanoseconds=860 totalConstructorNanoseconds=2580
|
||||
RESULT name=sort n=1000 t=8 iterations=2 durationNanoseconds=565645 totalDurationNanoseconds=1131290 constructorNanoseconds=15305 totalConstructorNanoseconds=30610
|
||||
RESULT name=sort n=10001 t=8 iterations=2 durationNanoseconds=715080 totalDurationNanoseconds=1430160 constructorNanoseconds=72435 totalConstructorNanoseconds=144870
|
||||
RESULT name=sort n=100000 t=8 iterations=1 durationNanoseconds=5976559 totalDurationNanoseconds=5976559 constructorNanoseconds=1178210 totalConstructorNanoseconds=1178210
|
||||
RESULT name=sort n=10000000 t=8 iterations=1 durationNanoseconds=1166965728 totalDurationNanoseconds=1166965728 constructorNanoseconds=124620786 totalConstructorNanoseconds=124620786
|
||||
RESULT name=sort n=100000000 t=8 iterations=1 durationNanoseconds=7031099529 totalDurationNanoseconds=7031099529 constructorNanoseconds=1382599627 totalConstructorNanoseconds=1382599627
|
||||
RESULT name=sort n=100 t=12 iterations=3 durationNanoseconds=411886 totalDurationNanoseconds=1235660 constructorNanoseconds=756 totalConstructorNanoseconds=2270
|
||||
RESULT name=sort n=1000 t=12 iterations=2 durationNanoseconds=874880 totalDurationNanoseconds=1749760 constructorNanoseconds=13010 totalConstructorNanoseconds=26020
|
||||
RESULT name=sort n=10001 t=12 iterations=2 durationNanoseconds=1048975 totalDurationNanoseconds=2097950 constructorNanoseconds=68755 totalConstructorNanoseconds=137510
|
||||
RESULT name=sort n=100000 t=12 iterations=1 durationNanoseconds=5733409 totalDurationNanoseconds=5733409 constructorNanoseconds=1157440 totalConstructorNanoseconds=1157440
|
||||
RESULT name=sort n=10000000 t=12 iterations=1 durationNanoseconds=1212925449 totalDurationNanoseconds=1212925449 constructorNanoseconds=128551595 totalConstructorNanoseconds=128551595
|
||||
RESULT name=sort n=100000000 t=12 iterations=1 durationNanoseconds=7789121434 totalDurationNanoseconds=7789121434 constructorNanoseconds=1402616603 totalConstructorNanoseconds=1402616603
|
||||
RESULT name=sort n=100 t=16 iterations=3 durationNanoseconds=429000 totalDurationNanoseconds=1287000 constructorNanoseconds=1326 totalConstructorNanoseconds=3980
|
||||
RESULT name=sort n=1000 t=16 iterations=2 durationNanoseconds=908530 totalDurationNanoseconds=1817060 constructorNanoseconds=9385 totalConstructorNanoseconds=18770
|
||||
RESULT name=sort n=10001 t=16 iterations=1 durationNanoseconds=1256210 totalDurationNanoseconds=1256210 constructorNanoseconds=102130 totalConstructorNanoseconds=102130
|
||||
RESULT name=sort n=100000 t=16 iterations=1 durationNanoseconds=6508428 totalDurationNanoseconds=6508428 constructorNanoseconds=1205470 totalConstructorNanoseconds=1205470
|
||||
RESULT name=sort n=10000000 t=16 iterations=1 durationNanoseconds=1348730723 totalDurationNanoseconds=1348730723 constructorNanoseconds=126286186 totalConstructorNanoseconds=126286186
|
||||
RESULT name=sort n=100000000 t=16 iterations=1 durationNanoseconds=7396974089 totalDurationNanoseconds=7396974089 constructorNanoseconds=1374412728 totalConstructorNanoseconds=1374412728
|
||||
41
result.txt
41
result.txt
@ -1,5 +1,36 @@
|
||||
RESULT name=sort n=100 t=1 iterations=545 durationNanoseconds=1835 totalDurationNanoseconds=1000511 constructorNanoseconds=232 totalConstructorNanoseconds=126950
|
||||
RESULT name=sort n=1000 t=1 iterations=29 durationNanoseconds=35382 totalDurationNanoseconds=1026091 constructorNanoseconds=773 totalConstructorNanoseconds=22440
|
||||
RESULT name=sort n=10001 t=1 iterations=2 durationNanoseconds=592405 totalDurationNanoseconds=1184811 constructorNanoseconds=31980 totalConstructorNanoseconds=63960
|
||||
RESULT name=sort n=100000 t=1 iterations=1 durationNanoseconds=24243178 totalDurationNanoseconds=24243178 constructorNanoseconds=562170 totalConstructorNanoseconds=562170
|
||||
RESULT name=sort n=10000000 t=1 iterations=1 durationNanoseconds=219559500741 totalDurationNanoseconds=219559500741 constructorNanoseconds=53379598 totalConstructorNanoseconds=53379598
|
||||
RESULT name=sort n=100 t=1 iterations=181 durationNanoseconds=5530 totalDurationNanoseconds=1001080 constructorNanoseconds=166 totalConstructorNanoseconds=30180
|
||||
RESULT name=sort n=1000 t=1 iterations=20 durationNanoseconds=50209 totalDurationNanoseconds=1004180 constructorNanoseconds=2275 totalConstructorNanoseconds=45500
|
||||
RESULT name=sort n=10001 t=1 iterations=2 durationNanoseconds=551520 totalDurationNanoseconds=1103040 constructorNanoseconds=78160 totalConstructorNanoseconds=156320
|
||||
RESULT name=sort n=100000 t=1 iterations=1 durationNanoseconds=8726749 totalDurationNanoseconds=8726749 constructorNanoseconds=1262160 totalConstructorNanoseconds=1262160
|
||||
RESULT name=sort n=10000000 t=1 iterations=1 durationNanoseconds=868109014 totalDurationNanoseconds=868109014 constructorNanoseconds=129001816 totalConstructorNanoseconds=129001816
|
||||
RESULT name=sort n=100000000 t=1 iterations=1 durationNanoseconds=16587735066 totalDurationNanoseconds=16587735066 constructorNanoseconds=1384105216 totalConstructorNanoseconds=1384105216
|
||||
RESULT name=sort n=100 t=2 iterations=13 durationNanoseconds=77370 totalDurationNanoseconds=1005810 constructorNanoseconds=370 totalConstructorNanoseconds=4820
|
||||
RESULT name=sort n=1000 t=2 iterations=8 durationNanoseconds=127625 totalDurationNanoseconds=1021000 constructorNanoseconds=5151 totalConstructorNanoseconds=41210
|
||||
RESULT name=sort n=10001 t=2 iterations=2 durationNanoseconds=648160 totalDurationNanoseconds=1296320 constructorNanoseconds=66385 totalConstructorNanoseconds=132770
|
||||
RESULT name=sort n=100000 t=2 iterations=1 durationNanoseconds=7803549 totalDurationNanoseconds=7803549 constructorNanoseconds=1158910 totalConstructorNanoseconds=1158910
|
||||
RESULT name=sort n=10000000 t=2 iterations=1 durationNanoseconds=1418380159 totalDurationNanoseconds=1418380159 constructorNanoseconds=131211135 totalConstructorNanoseconds=131211135
|
||||
RESULT name=sort n=100000000 t=2 iterations=1 durationNanoseconds=12091467754 totalDurationNanoseconds=12091467754 constructorNanoseconds=1417232259 totalConstructorNanoseconds=1417232259
|
||||
RESULT name=sort n=100 t=4 iterations=9 durationNanoseconds=118352 totalDurationNanoseconds=1065170 constructorNanoseconds=611 totalConstructorNanoseconds=5500
|
||||
RESULT name=sort n=1000 t=4 iterations=3 durationNanoseconds=359543 totalDurationNanoseconds=1078630 constructorNanoseconds=17990 totalConstructorNanoseconds=53970
|
||||
RESULT name=sort n=10001 t=4 iterations=2 durationNanoseconds=814465 totalDurationNanoseconds=1628930 constructorNanoseconds=134305 totalConstructorNanoseconds=268610
|
||||
RESULT name=sort n=100000 t=4 iterations=1 durationNanoseconds=6957149 totalDurationNanoseconds=6957149 constructorNanoseconds=1242780 totalConstructorNanoseconds=1242780
|
||||
RESULT name=sort n=10000000 t=4 iterations=1 durationNanoseconds=1117816157 totalDurationNanoseconds=1117816157 constructorNanoseconds=131642965 totalConstructorNanoseconds=131642965
|
||||
RESULT name=sort n=100000000 t=4 iterations=1 durationNanoseconds=8908347671 totalDurationNanoseconds=8908347671 constructorNanoseconds=1293569273 totalConstructorNanoseconds=1293569273
|
||||
RESULT name=sort n=100 t=8 iterations=4 durationNanoseconds=262700 totalDurationNanoseconds=1050800 constructorNanoseconds=660 totalConstructorNanoseconds=2640
|
||||
RESULT name=sort n=1000 t=8 iterations=1 durationNanoseconds=1083510 totalDurationNanoseconds=1083510 constructorNanoseconds=23680 totalConstructorNanoseconds=23680
|
||||
RESULT name=sort n=10001 t=8 iterations=2 durationNanoseconds=774170 totalDurationNanoseconds=1548340 constructorNanoseconds=78415 totalConstructorNanoseconds=156830
|
||||
RESULT name=sort n=100000 t=8 iterations=1 durationNanoseconds=5745139 totalDurationNanoseconds=5745139 constructorNanoseconds=1267620 totalConstructorNanoseconds=1267620
|
||||
RESULT name=sort n=10000000 t=8 iterations=1 durationNanoseconds=1254361820 totalDurationNanoseconds=1254361820 constructorNanoseconds=132939165 totalConstructorNanoseconds=132939165
|
||||
RESULT name=sort n=100000000 t=8 iterations=1 durationNanoseconds=7138660668 totalDurationNanoseconds=7138660668 constructorNanoseconds=1298459682 totalConstructorNanoseconds=1298459682
|
||||
RESULT name=sort n=100 t=12 iterations=3 durationNanoseconds=345353 totalDurationNanoseconds=1036060 constructorNanoseconds=1346 totalConstructorNanoseconds=4040
|
||||
RESULT name=sort n=1000 t=12 iterations=2 durationNanoseconds=587034 totalDurationNanoseconds=1174069 constructorNanoseconds=13240 totalConstructorNanoseconds=26480
|
||||
RESULT name=sort n=10001 t=12 iterations=2 durationNanoseconds=894959 totalDurationNanoseconds=1789919 constructorNanoseconds=73220 totalConstructorNanoseconds=146440
|
||||
RESULT name=sort n=100000 t=12 iterations=1 durationNanoseconds=5794189 totalDurationNanoseconds=5794189 constructorNanoseconds=1217119 totalConstructorNanoseconds=1217119
|
||||
RESULT name=sort n=10000000 t=12 iterations=1 durationNanoseconds=1233781405 totalDurationNanoseconds=1233781405 constructorNanoseconds=132758054 totalConstructorNanoseconds=132758054
|
||||
RESULT name=sort n=100000000 t=12 iterations=1 durationNanoseconds=7069741771 totalDurationNanoseconds=7069741771 constructorNanoseconds=1331710866 totalConstructorNanoseconds=1331710866
|
||||
RESULT name=sort n=100 t=16 iterations=3 durationNanoseconds=420606 totalDurationNanoseconds=1261819 constructorNanoseconds=1360 totalConstructorNanoseconds=4080
|
||||
RESULT name=sort n=1000 t=16 iterations=1 durationNanoseconds=1215220 totalDurationNanoseconds=1215220 constructorNanoseconds=20870 totalConstructorNanoseconds=20870
|
||||
RESULT name=sort n=10001 t=16 iterations=1 durationNanoseconds=1243860 totalDurationNanoseconds=1243860 constructorNanoseconds=113500 totalConstructorNanoseconds=113500
|
||||
RESULT name=sort n=100000 t=16 iterations=1 durationNanoseconds=6044489 totalDurationNanoseconds=6044489 constructorNanoseconds=1216550 totalConstructorNanoseconds=1216550
|
||||
RESULT name=sort n=10000000 t=16 iterations=1 durationNanoseconds=1294393674 totalDurationNanoseconds=1294393674 constructorNanoseconds=132577214 totalConstructorNanoseconds=132577214
|
||||
RESULT name=sort n=100000000 t=16 iterations=1 durationNanoseconds=7428194523 totalDurationNanoseconds=7428194523 constructorNanoseconds=1311964740 totalConstructorNanoseconds=1311964740
|
||||
|
||||
30
result.withoutbitset.txt
Normal file
30
result.withoutbitset.txt
Normal file
@ -0,0 +1,30 @@
|
||||
RESULT name=sort n=100 t=1 iterations=95 durationNanoseconds=10561 totalDurationNanoseconds=1003361 constructorNanoseconds=460 totalConstructorNanoseconds=43740
|
||||
RESULT name=sort n=1000 t=1 iterations=22 durationNanoseconds=46203 totalDurationNanoseconds=1016480 constructorNanoseconds=764 totalConstructorNanoseconds=16820
|
||||
RESULT name=sort n=10001 t=1 iterations=3 durationNanoseconds=459633 totalDurationNanoseconds=1378901 constructorNanoseconds=18656 totalConstructorNanoseconds=55970
|
||||
RESULT name=sort n=100000 t=1 iterations=1 durationNanoseconds=8231415 totalDurationNanoseconds=8231415 constructorNanoseconds=514381 totalConstructorNanoseconds=514381
|
||||
RESULT name=sort n=10000000 t=1 iterations=1 durationNanoseconds=744634713 totalDurationNanoseconds=744634713 constructorNanoseconds=51303216 totalConstructorNanoseconds=51303216
|
||||
RESULT name=sort n=100 t=2 iterations=18 durationNanoseconds=58551 totalDurationNanoseconds=1053931 constructorNanoseconds=361 totalConstructorNanoseconds=6510
|
||||
RESULT name=sort n=1000 t=2 iterations=9 durationNanoseconds=120015 totalDurationNanoseconds=1080141 constructorNanoseconds=1345 totalConstructorNanoseconds=12110
|
||||
RESULT name=sort n=10001 t=2 iterations=3 durationNanoseconds=495777 totalDurationNanoseconds=1487331 constructorNanoseconds=19473 totalConstructorNanoseconds=58420
|
||||
RESULT name=sort n=100000 t=2 iterations=1 durationNanoseconds=7827986 totalDurationNanoseconds=7827986 constructorNanoseconds=508510 totalConstructorNanoseconds=508510
|
||||
RESULT name=sort n=10000000 t=2 iterations=1 durationNanoseconds=1296871738 totalDurationNanoseconds=1296871738 constructorNanoseconds=49085315 totalConstructorNanoseconds=49085315
|
||||
RESULT name=sort n=100 t=4 iterations=9 durationNanoseconds=118596 totalDurationNanoseconds=1067371 constructorNanoseconds=455 totalConstructorNanoseconds=4100
|
||||
RESULT name=sort n=1000 t=4 iterations=5 durationNanoseconds=220166 totalDurationNanoseconds=1100831 constructorNanoseconds=2204 totalConstructorNanoseconds=11020
|
||||
RESULT name=sort n=10001 t=4 iterations=2 durationNanoseconds=502925 totalDurationNanoseconds=1005851 constructorNanoseconds=29945 totalConstructorNanoseconds=59890
|
||||
RESULT name=sort n=100000 t=4 iterations=1 durationNanoseconds=5620814 totalDurationNanoseconds=5620814 constructorNanoseconds=528570 totalConstructorNanoseconds=528570
|
||||
RESULT name=sort n=10000000 t=4 iterations=1 durationNanoseconds=1198704968 totalDurationNanoseconds=1198704968 constructorNanoseconds=50132516 totalConstructorNanoseconds=50132516
|
||||
RESULT name=sort n=100 t=8 iterations=5 durationNanoseconds=222358 totalDurationNanoseconds=1111791 constructorNanoseconds=1116 totalConstructorNanoseconds=5580
|
||||
RESULT name=sort n=1000 t=8 iterations=3 durationNanoseconds=457883 totalDurationNanoseconds=1373650 constructorNanoseconds=3950 totalConstructorNanoseconds=11851
|
||||
RESULT name=sort n=10001 t=8 iterations=2 durationNanoseconds=705680 totalDurationNanoseconds=1411361 constructorNanoseconds=30140 totalConstructorNanoseconds=60280
|
||||
RESULT name=sort n=100000 t=8 iterations=1 durationNanoseconds=6234564 totalDurationNanoseconds=6234564 constructorNanoseconds=530141 totalConstructorNanoseconds=530141
|
||||
RESULT name=sort n=10000000 t=8 iterations=1 durationNanoseconds=1219668902 totalDurationNanoseconds=1219668902 constructorNanoseconds=50095186 totalConstructorNanoseconds=50095186
|
||||
RESULT name=sort n=100 t=12 iterations=3 durationNanoseconds=336683 totalDurationNanoseconds=1010050 constructorNanoseconds=1553 totalConstructorNanoseconds=4660
|
||||
RESULT name=sort n=1000 t=12 iterations=2 durationNanoseconds=709845 totalDurationNanoseconds=1419691 constructorNanoseconds=3685 totalConstructorNanoseconds=7370
|
||||
RESULT name=sort n=10001 t=12 iterations=1 durationNanoseconds=1008971 totalDurationNanoseconds=1008971 constructorNanoseconds=45810 totalConstructorNanoseconds=45810
|
||||
RESULT name=sort n=100000 t=12 iterations=1 durationNanoseconds=4991544 totalDurationNanoseconds=4991544 constructorNanoseconds=494530 totalConstructorNanoseconds=494530
|
||||
RESULT name=sort n=10000000 t=12 iterations=1 durationNanoseconds=1295272477 totalDurationNanoseconds=1295272477 constructorNanoseconds=50175095 totalConstructorNanoseconds=50175095
|
||||
RESULT name=sort n=100 t=16 iterations=2 durationNanoseconds=668925 totalDurationNanoseconds=1337851 constructorNanoseconds=3290 totalConstructorNanoseconds=6580
|
||||
RESULT name=sort n=1000 t=16 iterations=2 durationNanoseconds=828375 totalDurationNanoseconds=1656751 constructorNanoseconds=5660 totalConstructorNanoseconds=11320
|
||||
RESULT name=sort n=10001 t=16 iterations=1 durationNanoseconds=1319491 totalDurationNanoseconds=1319491 constructorNanoseconds=65160 totalConstructorNanoseconds=65160
|
||||
RESULT name=sort n=100000 t=16 iterations=1 durationNanoseconds=7145805 totalDurationNanoseconds=7145805 constructorNanoseconds=529550 totalConstructorNanoseconds=529550
|
||||
RESULT name=sort n=10000000 t=16 iterations=1 durationNanoseconds=1483766932 totalDurationNanoseconds=1483766932 constructorNanoseconds=52444497 totalConstructorNanoseconds=52444497
|
||||
30
result.withouttaskhandler.txt
Normal file
30
result.withouttaskhandler.txt
Normal file
@ -0,0 +1,30 @@
|
||||
RESULT name=sort n=100 t=1 iterations=18 durationNanoseconds=57856 totalDurationNanoseconds=1041410 constructorNanoseconds=513 totalConstructorNanoseconds=9250
|
||||
RESULT name=sort n=1000 t=1 iterations=8 durationNanoseconds=126883 totalDurationNanoseconds=1015070 constructorNanoseconds=1475 totalConstructorNanoseconds=11800
|
||||
RESULT name=sort n=10001 t=1 iterations=2 durationNanoseconds=615565 totalDurationNanoseconds=1231131 constructorNanoseconds=30890 totalConstructorNanoseconds=61780
|
||||
RESULT name=sort n=100000 t=1 iterations=1 durationNanoseconds=9488107 totalDurationNanoseconds=9488107 constructorNanoseconds=524961 totalConstructorNanoseconds=524961
|
||||
RESULT name=sort n=10000000 t=1 iterations=1 durationNanoseconds=1416091993 totalDurationNanoseconds=1416091993 constructorNanoseconds=50440746 totalConstructorNanoseconds=50440746
|
||||
RESULT name=sort n=100 t=2 iterations=16 durationNanoseconds=63258 totalDurationNanoseconds=1012141 constructorNanoseconds=375 totalConstructorNanoseconds=6000
|
||||
RESULT name=sort n=1000 t=2 iterations=7 durationNanoseconds=154110 totalDurationNanoseconds=1078770 constructorNanoseconds=2008 totalConstructorNanoseconds=14060
|
||||
RESULT name=sort n=10001 t=2 iterations=3 durationNanoseconds=451387 totalDurationNanoseconds=1354161 constructorNanoseconds=19620 totalConstructorNanoseconds=58860
|
||||
RESULT name=sort n=100000 t=2 iterations=1 durationNanoseconds=6236655 totalDurationNanoseconds=6236655 constructorNanoseconds=514650 totalConstructorNanoseconds=514650
|
||||
RESULT name=sort n=10000000 t=2 iterations=1 durationNanoseconds=1380325518 totalDurationNanoseconds=1380325518 constructorNanoseconds=50373886 totalConstructorNanoseconds=50373886
|
||||
RESULT name=sort n=100 t=4 iterations=9 durationNanoseconds=118743 totalDurationNanoseconds=1068691 constructorNanoseconds=436 totalConstructorNanoseconds=3930
|
||||
RESULT name=sort n=1000 t=4 iterations=4 durationNanoseconds=272115 totalDurationNanoseconds=1088461 constructorNanoseconds=2415 totalConstructorNanoseconds=9660
|
||||
RESULT name=sort n=10001 t=4 iterations=2 durationNanoseconds=569255 totalDurationNanoseconds=1138510 constructorNanoseconds=29920 totalConstructorNanoseconds=59840
|
||||
RESULT name=sort n=100000 t=4 iterations=1 durationNanoseconds=6598125 totalDurationNanoseconds=6598125 constructorNanoseconds=507180 totalConstructorNanoseconds=507180
|
||||
RESULT name=sort n=10000000 t=4 iterations=1 durationNanoseconds=1300242690 totalDurationNanoseconds=1300242690 constructorNanoseconds=50475097 totalConstructorNanoseconds=50475097
|
||||
RESULT name=sort n=100 t=8 iterations=3 durationNanoseconds=347863 totalDurationNanoseconds=1043591 constructorNanoseconds=2706 totalConstructorNanoseconds=8120
|
||||
RESULT name=sort n=1000 t=8 iterations=2 durationNanoseconds=610620 totalDurationNanoseconds=1221241 constructorNanoseconds=10400 totalConstructorNanoseconds=20800
|
||||
RESULT name=sort n=10001 t=8 iterations=2 durationNanoseconds=706495 totalDurationNanoseconds=1412991 constructorNanoseconds=29600 totalConstructorNanoseconds=59200
|
||||
RESULT name=sort n=100000 t=8 iterations=1 durationNanoseconds=7387085 totalDurationNanoseconds=7387085 constructorNanoseconds=557391 totalConstructorNanoseconds=557391
|
||||
RESULT name=sort n=10000000 t=8 iterations=1 durationNanoseconds=1261560682 totalDurationNanoseconds=1261560682 constructorNanoseconds=49470756 totalConstructorNanoseconds=49470756
|
||||
RESULT name=sort n=100 t=12 iterations=3 durationNanoseconds=432037 totalDurationNanoseconds=1296111 constructorNanoseconds=1170 totalConstructorNanoseconds=3510
|
||||
RESULT name=sort n=1000 t=12 iterations=1 durationNanoseconds=1092461 totalDurationNanoseconds=1092461 constructorNanoseconds=12880 totalConstructorNanoseconds=12880
|
||||
RESULT name=sort n=10001 t=12 iterations=1 durationNanoseconds=1019941 totalDurationNanoseconds=1019941 constructorNanoseconds=54540 totalConstructorNanoseconds=54540
|
||||
RESULT name=sort n=100000 t=12 iterations=1 durationNanoseconds=7159465 totalDurationNanoseconds=7159465 constructorNanoseconds=536730 totalConstructorNanoseconds=536730
|
||||
RESULT name=sort n=10000000 t=12 iterations=1 durationNanoseconds=1503813105 totalDurationNanoseconds=1503813105 constructorNanoseconds=50150056 totalConstructorNanoseconds=50150056
|
||||
RESULT name=sort n=100 t=16 iterations=3 durationNanoseconds=432706 totalDurationNanoseconds=1298120 constructorNanoseconds=3833 totalConstructorNanoseconds=11500
|
||||
RESULT name=sort n=1000 t=16 iterations=2 durationNanoseconds=784875 totalDurationNanoseconds=1569751 constructorNanoseconds=5285 totalConstructorNanoseconds=10570
|
||||
RESULT name=sort n=10001 t=16 iterations=1 durationNanoseconds=1953311 totalDurationNanoseconds=1953311 constructorNanoseconds=59420 totalConstructorNanoseconds=59420
|
||||
RESULT name=sort n=100000 t=16 iterations=1 durationNanoseconds=6820104 totalDurationNanoseconds=6820104 constructorNanoseconds=524961 totalConstructorNanoseconds=524961
|
||||
RESULT name=sort n=10000000 t=16 iterations=1 durationNanoseconds=1352546567 totalDurationNanoseconds=1352546567 constructorNanoseconds=52707158 totalConstructorNanoseconds=52707158
|
||||
@ -2,22 +2,28 @@
|
||||
|
||||
#include <cstddef>
|
||||
#include <span>
|
||||
#include <iostream>
|
||||
|
||||
namespace ae {
|
||||
|
||||
container::container(std::span<const element_type> data) {
|
||||
// TODO create your datastructure from the given data
|
||||
|
||||
// The code below is a simple example splitting the data into 16 blocks,
|
||||
// but you may find other options better suited for your sorting algorithm.
|
||||
constexpr std::size_t num_blocks = 16;
|
||||
const std::ptrdiff_t elements_per_block = (data.size() + num_blocks - 1) / num_blocks;
|
||||
// Reserve enough space for the chunks to avoid relocation while building the structure
|
||||
container::Directory dir(data.size() / container::chunk_size + 1);
|
||||
|
||||
auto entry = 0;
|
||||
for (auto first = data.begin(); first < data.end();) {
|
||||
const auto last = (data.end() - first) < elements_per_block ? data.end() : first + elements_per_block;
|
||||
placeholder_.emplace_back(first, last);
|
||||
const auto last = (data.end() - first) < container::chunk_size ? data.end() : first + container::chunk_size;
|
||||
Chunk chunk;
|
||||
|
||||
// This could be improved by just pointing dir[entry] to first, removing the copy process.
|
||||
std::copy(first, last, chunk.begin());
|
||||
dir[entry++] = chunk;
|
||||
first = last;
|
||||
}
|
||||
this->data = dir;
|
||||
this->size_ = data.size();
|
||||
}
|
||||
|
||||
uint32_t container::size() { return this->size_; }
|
||||
|
||||
} // namespace ae
|
||||
|
||||
@ -15,22 +15,79 @@ class container {
|
||||
friend class sorter;
|
||||
|
||||
public:
|
||||
const static uint32_t chunk_size = 64;
|
||||
using element_type = std::uint64_t;
|
||||
using Chunk = std::array<element_type, chunk_size>;
|
||||
using Directory = std::vector<Chunk>;
|
||||
|
||||
// This class represents a random access iterator to enable std::range based usage similar to a single vector.
|
||||
// As I've implemented the data structure as an array-directory as presented in the lecture, random access is possible in O(1).
|
||||
// Since C++'s concept of iterators is more complex than similar implementations in java or rust, I've used an AI to help decipher which methods I need/should implement for a valid random access iterator.
|
||||
// The code however is written by myself.
|
||||
class Iterator {
|
||||
private:
|
||||
Directory* dir;
|
||||
size_t index;
|
||||
public:
|
||||
Iterator(Directory* dir, size_t pos) {
|
||||
this->dir = dir;
|
||||
this->index = pos;
|
||||
}
|
||||
element_type& operator*() const {
|
||||
return ((*dir)[index / chunk_size])[index % chunk_size];
|
||||
}
|
||||
element_type* operator->() const {
|
||||
return &((*dir)[index / chunk_size])[index % chunk_size];
|
||||
}
|
||||
|
||||
Iterator& operator++() { ++index; return *this; }
|
||||
Iterator operator++(int) {
|
||||
auto tmp = *this;
|
||||
++(*this);
|
||||
return tmp;
|
||||
}
|
||||
Iterator& operator--() { --index; return *this; }
|
||||
Iterator operator--(int) {
|
||||
auto tmp = *this;
|
||||
--(*this);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
Iterator& operator+=(std::ptrdiff_t n) { index += n; return *this; }
|
||||
Iterator& operator-=(std::ptrdiff_t n) { index -= n; return *this; }
|
||||
Iterator operator+(std::ptrdiff_t n) const { return Iterator(dir, index + n); }
|
||||
Iterator operator-(std::ptrdiff_t n) const { return Iterator(dir, index - n); }
|
||||
|
||||
std::ptrdiff_t operator-(const Iterator& other) const { return index - other.index; }
|
||||
element_type& operator[](std::ptrdiff_t n) const { return *(*this + n); }
|
||||
|
||||
bool operator==(const Iterator& other) const { return index == other.index; }
|
||||
bool operator!=(const Iterator& other) const { return index != other.index; }
|
||||
bool operator<=(const Iterator& other) const { return index <= other.index; }
|
||||
bool operator>=(const Iterator& other) const { return index >= other.index; }
|
||||
bool operator<(const Iterator& other) const { return index < other.index; }
|
||||
bool operator>(const Iterator& other) const { return index > other.index; }
|
||||
};
|
||||
|
||||
explicit container(std::span<const element_type> data);
|
||||
|
||||
// TODO You may also add additional functions (or data members).
|
||||
|
||||
private:
|
||||
// TODO define your data layout
|
||||
// Your datastructure should consist of multiple blocks of data, which don't
|
||||
// necessarily have to be vectors.
|
||||
std::vector<std::vector<element_type>> placeholder_;
|
||||
|
||||
// std::vector<std::vector<element_type>> placeholder_;
|
||||
std::vector<std::array<element_type, chunk_size>> data;
|
||||
uint32_t size_;
|
||||
|
||||
public:
|
||||
[[nodiscard]] auto to_view() const {
|
||||
return std::views::join(placeholder_);
|
||||
// join does not respect the boundary given by the end iterator, so we have to trim it.
|
||||
return std::views::join(data) | std::views::take(size_);
|
||||
}
|
||||
Iterator begin() { return Iterator(&data, 0); }
|
||||
Iterator end() { return Iterator(&data, size_); }
|
||||
uint32_t size();
|
||||
};
|
||||
|
||||
} // namespace ae
|
||||
|
||||
11
src/single_task_handler.cpp
Normal file
11
src/single_task_handler.cpp
Normal file
@ -0,0 +1,11 @@
|
||||
#include "single_task_handler.hpp"
|
||||
|
||||
void SingleTaskHandler::add(std::function<void()> task) {
|
||||
task();
|
||||
}
|
||||
uint32_t SingleTaskHandler::size() {
|
||||
return 0;
|
||||
}
|
||||
bool SingleTaskHandler::isWorking() {
|
||||
return false;
|
||||
}
|
||||
8
src/single_task_handler.hpp
Normal file
8
src/single_task_handler.hpp
Normal file
@ -0,0 +1,8 @@
|
||||
#pragma once
|
||||
#include "task_handler.hpp"
|
||||
|
||||
class SingleTaskHandler: TaskHandler {
|
||||
void add(std::function<void()> task);
|
||||
uint32_t size();
|
||||
bool isWorking();
|
||||
};
|
||||
201
src/sorter.cpp
201
src/sorter.cpp
@ -1,5 +1,8 @@
|
||||
#include "sorter.hpp"
|
||||
|
||||
#include "single_task_handler.hpp"
|
||||
#include "thread_pool.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <cassert>
|
||||
@ -11,55 +14,71 @@
|
||||
|
||||
namespace ae {
|
||||
|
||||
void sorter::sort(container& data) {
|
||||
for (auto i = 1uz; i < data.placeholder_.size(); ++i) {
|
||||
std::ranges::copy(data.placeholder_[i], std::back_inserter(data.placeholder_[0]));
|
||||
data.placeholder_[i].clear();
|
||||
sorter::sorter(uint32_t num = 1) {
|
||||
sorter::num_threads = num;
|
||||
if (num > 1) {
|
||||
sorter::handler = (TaskHandler*) new ThreadPool(num);
|
||||
} else {
|
||||
sorter::handler = (TaskHandler*) new SingleTaskHandler();
|
||||
}
|
||||
}
|
||||
|
||||
void sorter::sort(container& data) {
|
||||
// for (auto i = 1uz; i < data.placeholder_.size(); ++i) {
|
||||
// std::ranges::copy(data.placeholder_[i], std::back_inserter(data.placeholder_[0]));
|
||||
// data.placeholder_[i].clear();
|
||||
// }
|
||||
#if DEBUG
|
||||
std::vector<container::element_type> copy;
|
||||
std::ranges::copy(data.placeholder_[0], std::back_inserter(copy));
|
||||
for (auto element : data) {
|
||||
copy.push_back(element);
|
||||
}
|
||||
auto begin = data.begin();
|
||||
for (int i = 0; i < data.size(); i++) {
|
||||
std::cerr << i << " before:" << begin[i] << std::endl;
|
||||
}
|
||||
std::sort(copy.begin(), copy.end());
|
||||
#endif
|
||||
sorter::msd_inplace_radix_sort(data.placeholder_[0], 0, [&](auto span) {sorter::robin_hood_sort(span);});
|
||||
|
||||
sorter::msd_inplace_radix_sort(data.begin(), data.end(), 0, [&](auto begin, auto end) {sorter::robin_hood_sort(begin, end);});
|
||||
while (sorter::handler->size() > 0 || sorter::handler->isWorking()) {};
|
||||
#if DEBUG
|
||||
std::cerr << "Final check if sorted" << std::endl;
|
||||
for (int i = 0; i < copy.size(); i++) {
|
||||
if (copy[i] != data.placeholder_[0][i])
|
||||
std::cerr << i << " " << "sorted: " << copy[i] << " actual:" << data.placeholder_[0][i] << std::endl;
|
||||
if (copy[i] != begin[i])
|
||||
std::cerr << i << " " << "sorted: " << copy[i] << " actual:" << begin[i] << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
void sorter::msd_inplace_radix_sort(
|
||||
std::span<container::element_type> range,
|
||||
void sorter::msd_inplace_radix_sort_binary(
|
||||
container::Iterator begin,
|
||||
container::Iterator end,
|
||||
size_t passes,
|
||||
const std::function<void(std::span<container::element_type> bucket)>& bucket_sort
|
||||
const std::function<void(container::Iterator begin, container::Iterator end)>& bucket_sort
|
||||
) {
|
||||
if (std::begin(range) >= std::end(range)) {
|
||||
if (begin >= end) {
|
||||
return;
|
||||
}
|
||||
if (sorter::RADIX_ITERATIONS == passes) {
|
||||
switch (range.size()) {
|
||||
switch (end - begin) {
|
||||
case 1: return;
|
||||
case 2:
|
||||
if (range[0] >= range[1]) {
|
||||
std::swap(range[0], range[1]);
|
||||
if (begin[0] >= begin[1]) {
|
||||
std::swap(begin[0], begin[1]);
|
||||
}
|
||||
return;
|
||||
default:
|
||||
bucket_sort(range);
|
||||
bucket_sort(begin, end);
|
||||
return;
|
||||
}
|
||||
if (range.size() > 1) {
|
||||
bucket_sort(range);
|
||||
if (end - begin > 1) {
|
||||
bucket_sort(begin, end);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
auto lower = std::begin(range);
|
||||
auto upper = std::end(range);
|
||||
auto lower = begin;
|
||||
auto upper = end;
|
||||
while (lower < upper) {
|
||||
|
||||
if (*lower & (1L << (sizeof(container::element_type) * CHAR_BIT - passes - 1))) {
|
||||
@ -71,31 +90,139 @@ void sorter::msd_inplace_radix_sort(
|
||||
}
|
||||
}
|
||||
#if DEBUG
|
||||
std::cerr << "pass: " << passes << " begin: " << &*std::begin(range) << " end: " << &*std::end(range) << " lower: " << &*lower << std::endl;
|
||||
std::cerr << "pass: " << passes << " begin: " << &*begin << " end: " << &*end << " lower: " << &*lower << std::endl;
|
||||
#endif
|
||||
sorter::msd_inplace_radix_sort(std::span<container::element_type> (std::begin(range), lower), passes + 1, bucket_sort);
|
||||
sorter::msd_inplace_radix_sort(std::span<container::element_type> (lower, std::end(range)), passes + 1, bucket_sort);
|
||||
sorter::msd_inplace_radix_sort_binary(begin, lower, passes + 1, bucket_sort);
|
||||
sorter::msd_inplace_radix_sort_binary(lower, end, passes + 1, bucket_sort);
|
||||
}
|
||||
void sorter::robin_hood_sort(std::span<container::element_type> bucket) {
|
||||
const auto size = bucket.size() + sorter::OVERHEAD_SIZE;
|
||||
|
||||
void sorter::msd_inplace_radix_sort(
|
||||
container::Iterator begin,
|
||||
container::Iterator end,
|
||||
size_t passes,
|
||||
const std::function<void(container::Iterator begin, container::Iterator end)>& bucket_sort
|
||||
) {
|
||||
if (begin > end) {
|
||||
return;
|
||||
}
|
||||
|
||||
if ((end - begin) <= sorter::SMALL_SORT_THRESHHOLD) {
|
||||
bucket_sort(begin, end);
|
||||
return;
|
||||
}
|
||||
|
||||
// We first determine the number of elements per bucket
|
||||
// This is one pass additional pass over the elements and needs O(buckets) additional space, so in one configuration constant overhead
|
||||
uint32_t bucket_sizes[sorter::RADIX_BUCKETS] = { 0 };
|
||||
auto upper_bucket_mask = ((1L << sorter::RADIX_SIZE) - 1) << (sizeof(container::element_type) * CHAR_BIT - sorter::RADIX_SIZE * (1 + passes));
|
||||
|
||||
auto mask_bucket = [&](container::element_type* element){ return (*element & upper_bucket_mask) >> (sizeof(container::element_type) * CHAR_BIT - sorter::RADIX_SIZE) * (1 + passes); };
|
||||
|
||||
for (auto element = begin; element < end; ++element) {
|
||||
auto bucket = mask_bucket(&*element);
|
||||
bucket_sizes[bucket]++;
|
||||
}
|
||||
#if DEBUG
|
||||
std::cerr << "Bucket sizes: ";
|
||||
for (auto bucket : bucket_sizes) {
|
||||
std::cerr << bucket << " ";
|
||||
}
|
||||
std::cerr << std::endl;
|
||||
#endif
|
||||
|
||||
// We now point each bucket to its start location in the range
|
||||
container::Iterator* buckets_end[sorter::RADIX_BUCKETS];
|
||||
container::Iterator* buckets_start[sorter::RADIX_BUCKETS];
|
||||
#if DEBUG
|
||||
std::cerr << "Starting bucket" << std::endl;
|
||||
#endif
|
||||
auto count = 0;
|
||||
for (int i = 0; i < sorter::RADIX_BUCKETS; ++i) {
|
||||
buckets_end[i] = new container::Iterator(begin + count);
|
||||
buckets_start[i] = new container::Iterator(begin + count);
|
||||
#if DEBUG
|
||||
std::cerr << "bucket " << i << " at " << count << std::endl;
|
||||
#endif
|
||||
count += bucket_sizes[i];
|
||||
}
|
||||
#if DEBUG
|
||||
std::cerr << "finish" << std::endl;
|
||||
#endif
|
||||
// Loop over the elements and swap them into the correct buckets.
|
||||
// This will look at each element exactly once.
|
||||
auto element = begin;
|
||||
while (element < end) {
|
||||
uint32_t bucket = mask_bucket(&*element);
|
||||
|
||||
// Check if we are currently in the bounds of the corresponding bucket
|
||||
if (element >= *buckets_start[bucket] && element < *buckets_end[bucket]) {
|
||||
// The element is in the correct bucket, we skip to the end of the bucket
|
||||
element = *buckets_end[bucket];
|
||||
} else {
|
||||
// The element is not in the correct bucket; swap
|
||||
std::swap(*element, **buckets_end[bucket]);
|
||||
(*buckets_end[bucket])++;
|
||||
}
|
||||
}
|
||||
#if DEBUG
|
||||
for (int i = 0; i < end - begin; i++) {
|
||||
std::cerr << i << " reordered:" << begin[i] << std::endl;
|
||||
}
|
||||
std::cerr << "Finish reordering elements" << std::endl;
|
||||
std::cerr << "Bucket elements at begin of bucket" << std::endl;
|
||||
for (auto bucket : buckets_start) {
|
||||
std::cerr << (**bucket) << " bucket " << mask_bucket(&**bucket) << std::endl;
|
||||
}
|
||||
std::cerr << std::endl;
|
||||
#endif
|
||||
|
||||
for (auto i = 0; i < sorter::RADIX_BUCKETS - 1; ++i) {
|
||||
assert(*buckets_end[i] == *buckets_start[i + 1]);
|
||||
}
|
||||
assert(*buckets_end[sorter::RADIX_BUCKETS - 1] == end);
|
||||
#if DEBUG
|
||||
std::cerr << "Ranges of buckets are correct" << std::endl;
|
||||
#endif
|
||||
|
||||
// sort each bucket recursively
|
||||
for (auto i = 0; i < sorter::RADIX_BUCKETS; i++) {
|
||||
#if DEBUG
|
||||
std::cerr << "Putting in task with depth " << passes << " of bucket " << i << std::endl;
|
||||
#endif
|
||||
auto start = buckets_start[i];
|
||||
auto end = buckets_end[i];
|
||||
sorter::handler->add([start, end, &bucket_sort, passes, this, i](){
|
||||
#if DEBUG
|
||||
std::cerr << "Starting task with depth " << passes << " of bucket " << i << std::endl;
|
||||
#endif
|
||||
sorter::msd_inplace_radix_sort(*start, *end, passes + 1, bucket_sort);
|
||||
#if DEBUG
|
||||
std::cerr << "Finishing task with depth " << passes << " of bucket " << i << std::endl;
|
||||
#endif
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void sorter::robin_hood_sort(container::Iterator begin, container::Iterator end) {
|
||||
const auto size = (end - begin) + sorter::OVERHEAD_SIZE;
|
||||
const auto mask = ((1L) << (sizeof(container::element_type) * CHAR_BIT - sorter::RADIX_ITERATIONS)) - 1;
|
||||
std::vector<container::element_type> space(size, -1L);
|
||||
for (auto element : bucket) {
|
||||
auto masked_element = (element & mask);
|
||||
auto index = ((masked_element) * bucket.size()) / mask;
|
||||
for (auto element = begin; element < end; ++element) {
|
||||
auto masked_element = (*element & mask);
|
||||
auto index = ((masked_element) * (end - begin)) / mask;
|
||||
if (space[index] == -1) {
|
||||
space[index] = element;
|
||||
space[index] = *element;
|
||||
} else {
|
||||
#if DEBUG
|
||||
std::cerr << "Linear probing of " << element << " at index " << index << ". Current element " << space[index] << std::endl;
|
||||
std::cerr << "Linear probing of " << *element << " at index " << index << ". Current element " << space[index] << std::endl;
|
||||
#endif
|
||||
auto i = index;
|
||||
// linear probing
|
||||
while (i < size - 1 && space[i] != -1) {++i;};
|
||||
#if DEBUG
|
||||
std::cerr << "Inserting " << element << " at index " << i << " instead of " << index << std::endl;
|
||||
std::cerr << "Inserting " << *element << " at index " << i << " instead of " << index << std::endl;
|
||||
#endif
|
||||
space[i] = element;
|
||||
space[i] = *element;
|
||||
}
|
||||
}
|
||||
|
||||
@ -118,8 +245,8 @@ void sorter::robin_hood_sort(std::span<container::element_type> bucket) {
|
||||
|
||||
#if DEBUG
|
||||
std::cerr << "Original\n";
|
||||
for (auto element : bucket) {
|
||||
std::cerr << element << " ";
|
||||
for (auto element = begin; element < end; ++element) {
|
||||
std::cerr << *element << " ";
|
||||
}
|
||||
std::cerr << std::endl;
|
||||
|
||||
@ -132,7 +259,7 @@ void sorter::robin_hood_sort(std::span<container::element_type> bucket) {
|
||||
|
||||
// copy data back into original range
|
||||
auto i = 0;
|
||||
for (auto element = std::begin(bucket); element < std::end(bucket); ++element) {
|
||||
for (auto element = begin; element < end; ++element) {
|
||||
*element = space[i];
|
||||
++i;
|
||||
}
|
||||
|
||||
@ -1,7 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include "container.hpp"
|
||||
#include "task_handler.hpp"
|
||||
#include "functional"
|
||||
#include "math.h"
|
||||
#include <mutex>
|
||||
|
||||
namespace ae {
|
||||
|
||||
@ -10,16 +13,38 @@ class sorter {
|
||||
void sort(container& data);
|
||||
|
||||
// TODO You may add additional functions or data members to the sorter.
|
||||
|
||||
sorter(uint32_t num_threads);
|
||||
|
||||
void msd_inplace_radix_sort(
|
||||
container::Iterator begin,
|
||||
container::Iterator end,
|
||||
size_t passes,
|
||||
const std::function<void(container::Iterator begin, container::Iterator end)>& bucket_sort
|
||||
);
|
||||
|
||||
void parallel_msd_inplace_radix_sort(
|
||||
std::span<container::element_type> range,
|
||||
size_t passes,
|
||||
const std::function<void(std::span<container::element_type> bucket)>& bucket_sort
|
||||
);
|
||||
|
||||
const uint32_t OVERHEAD_SIZE = 10L;
|
||||
const uint32_t RADIX_ITERATIONS = 8;
|
||||
void msd_inplace_radix_sort_binary(
|
||||
container::Iterator begin,
|
||||
container::Iterator end,
|
||||
size_t passes,
|
||||
const std::function<void(container::Iterator begin, container::Iterator end)>& bucket_sort
|
||||
);
|
||||
|
||||
void robin_hood_sort(std::span<container::element_type> range);
|
||||
const uint32_t OVERHEAD_SIZE = 100L;
|
||||
const uint32_t SMALL_SORT_THRESHHOLD = 100;
|
||||
const uint32_t RADIX_SIZE = 4;
|
||||
const uint32_t RADIX_BUCKETS = std::pow(2, 4);
|
||||
const uint32_t RADIX_ITERATIONS = 8;
|
||||
uint32_t num_threads;
|
||||
TaskHandler* handler = nullptr;
|
||||
|
||||
void robin_hood_sort(container::Iterator begin, container::Iterator end);
|
||||
};
|
||||
|
||||
} // namespace ae
|
||||
|
||||
11
src/task_handler.hpp
Normal file
11
src/task_handler.hpp
Normal file
@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <cstdint>
|
||||
|
||||
class TaskHandler {
|
||||
public:
|
||||
virtual void add(std::function<void()> task) = 0;
|
||||
virtual uint32_t size() = 0;
|
||||
virtual bool isWorking() = 0;
|
||||
};
|
||||
58
src/thread_pool.cpp
Normal file
58
src/thread_pool.cpp
Normal file
@ -0,0 +1,58 @@
|
||||
#include "thread_pool.hpp"
|
||||
|
||||
ThreadPool::ThreadPool(size_t num_threads) {
|
||||
states = new bool[num_threads]({ false });
|
||||
for (auto i = 0; i < num_threads; ++i) {
|
||||
threads.emplace_back([this, i] {
|
||||
while (true) {
|
||||
std::function<void()> task;
|
||||
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
|
||||
cv.wait(lock, [this] { return !tasks.empty() || stop; });
|
||||
|
||||
if (tasks.empty() || stop) {
|
||||
return;
|
||||
}
|
||||
|
||||
states[i] = true;
|
||||
|
||||
task = std::move(tasks.front());
|
||||
tasks.pop();
|
||||
|
||||
lock.unlock();
|
||||
task();
|
||||
|
||||
states[i] = false;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
ThreadPool::~ThreadPool() {
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
stop = true;
|
||||
lock.unlock();
|
||||
|
||||
cv.notify_all();
|
||||
for (auto& thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
void ThreadPool::add(std::function<void()> task) {
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
tasks.emplace(std::move(task));
|
||||
cv.notify_one();
|
||||
lock.unlock();
|
||||
}
|
||||
|
||||
uint32_t ThreadPool::size() { return tasks.size(); }
|
||||
bool ThreadPool::isWorking() {
|
||||
for (auto i = 0; i < threads.size(); i++) {
|
||||
if (states[i]) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
30
src/thread_pool.hpp
Normal file
30
src/thread_pool.hpp
Normal file
@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#include "task_handler.hpp"
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include <queue>
|
||||
#include <thread>
|
||||
#include <functional>
|
||||
#include <condition_variable>
|
||||
|
||||
// This class was inspired by https://www.geeksforgeeks.org/cpp/thread-pool-in-cpp/ (access: 26/09/2025)
|
||||
// to more efficiently handle threads
|
||||
|
||||
class ThreadPool: TaskHandler {
|
||||
private:
|
||||
std::vector<std::thread> threads;
|
||||
std::queue<std::function<void()>> tasks;
|
||||
std::mutex mutex;
|
||||
std::condition_variable cv;
|
||||
bool stop = false;
|
||||
bool* states;
|
||||
|
||||
public:
|
||||
ThreadPool(size_t num_threads);
|
||||
~ThreadPool();
|
||||
void add(std::function<void()> task);
|
||||
uint32_t size();
|
||||
bool isWorking();
|
||||
};
|
||||
Loading…
x
Reference in New Issue
Block a user