diff --git a/eval.py b/eval.py index cd705cd..0d85955 100755 --- a/eval.py +++ b/eval.py @@ -9,7 +9,7 @@ def run_experiment(output_file, build_dir): # The number of threads is not currently used, it's just here in case you want to parallelize your code. for threads in [1, 2, 4, 8, 12, 16]: # for size in [1e2, 1e3, 1e4 + 1, 1e5, 1e6 - 1, 1e7]: - for size in [1e2, 1e3, 1e4 + 1, 1e5, 1e7]: + for size in [1e2, 1e3, 1e4 + 1, 1e5, 1e7, 1e8]: print("Measuring p=" + str(threads) + " n=" + str(size)) executable = Path(build_dir) / "Sorter" returncode = subprocess.call([executable, str(size), str(threads)], stdout=output_file) @@ -50,11 +50,11 @@ def make_plot(result_file): axs[i].set_title(f"#p={t}") for name in plots[t]: axs[i].plot(*zip(*plots[t][name]), label=name, marker='x') - axs[i].plot(*zip(*plots[t][name + " (constructor)"]), label=name + " (constructor)", marker='+') + axs[i].plot(*zip(*plots[t][name]), label=name + " (constructor)", marker='+') axs[i].set_xscale('log') else: axs.plot(*zip(*plots[t][name]), label=name, marker='x') - axs.plot(*zip(*plots[t][name + " (constructor)"]), label=name + " (constructor)", marker='+') + axs.plot(*zip(*plots[t][name]), label=name + " (constructor)", marker='+') axs.set_xscale('log') if len(plots) > 1: diff --git a/plot.pdf b/plot.pdf new file mode 100644 index 0000000..a4fb6aa Binary files /dev/null and b/plot.pdf differ diff --git a/result.b32.txt b/result.b32.txt new file mode 100644 index 0000000..f4586f8 --- /dev/null +++ b/result.b32.txt @@ -0,0 +1,36 @@ +RESULT name=sort n=100 t=1 iterations=129 durationNanoseconds=7772 totalDurationNanoseconds=1002700 constructorNanoseconds=197 totalConstructorNanoseconds=25540 +RESULT name=sort n=1000 t=1 iterations=21 durationNanoseconds=49038 totalDurationNanoseconds=1029800 constructorNanoseconds=2039 totalConstructorNanoseconds=42839 +RESULT name=sort n=10001 t=1 iterations=3 durationNanoseconds=462140 totalDurationNanoseconds=1386420 constructorNanoseconds=56730 totalConstructorNanoseconds=170190 +RESULT name=sort n=100000 t=1 iterations=1 durationNanoseconds=8417938 totalDurationNanoseconds=8417938 constructorNanoseconds=1179370 totalConstructorNanoseconds=1179370 +RESULT name=sort n=10000000 t=1 iterations=1 durationNanoseconds=874141313 totalDurationNanoseconds=874141313 constructorNanoseconds=124815857 totalConstructorNanoseconds=124815857 +RESULT name=sort n=100000000 t=1 iterations=1 durationNanoseconds=16451992962 totalDurationNanoseconds=16451992962 constructorNanoseconds=1368193059 totalConstructorNanoseconds=1368193059 +RESULT name=sort n=100 t=2 iterations=10 durationNanoseconds=104213 totalDurationNanoseconds=1042130 constructorNanoseconds=913 totalConstructorNanoseconds=9130 +RESULT name=sort n=1000 t=2 iterations=6 durationNanoseconds=169346 totalDurationNanoseconds=1016080 constructorNanoseconds=8115 totalConstructorNanoseconds=48690 +RESULT name=sort n=10001 t=2 iterations=2 durationNanoseconds=612194 totalDurationNanoseconds=1224389 constructorNanoseconds=73415 totalConstructorNanoseconds=146830 +RESULT name=sort n=100000 t=2 iterations=1 durationNanoseconds=6726818 totalDurationNanoseconds=6726818 constructorNanoseconds=1205860 totalConstructorNanoseconds=1205860 +RESULT name=sort n=10000000 t=2 iterations=1 durationNanoseconds=1233839684 totalDurationNanoseconds=1233839684 constructorNanoseconds=124416317 totalConstructorNanoseconds=124416317 +RESULT name=sort n=100000000 t=2 iterations=1 durationNanoseconds=12847945920 totalDurationNanoseconds=12847945920 constructorNanoseconds=1375436787 totalConstructorNanoseconds=1375436787 +RESULT name=sort n=100 t=4 iterations=8 durationNanoseconds=125433 totalDurationNanoseconds=1003470 constructorNanoseconds=362 totalConstructorNanoseconds=2900 +RESULT name=sort n=1000 t=4 iterations=5 durationNanoseconds=239229 totalDurationNanoseconds=1196149 constructorNanoseconds=7132 totalConstructorNanoseconds=35660 +RESULT name=sort n=10001 t=4 iterations=3 durationNanoseconds=508916 totalDurationNanoseconds=1526750 constructorNanoseconds=61393 totalConstructorNanoseconds=184180 +RESULT name=sort n=100000 t=4 iterations=1 durationNanoseconds=6239728 totalDurationNanoseconds=6239728 constructorNanoseconds=1209990 totalConstructorNanoseconds=1209990 +RESULT name=sort n=10000000 t=4 iterations=1 durationNanoseconds=1181622265 totalDurationNanoseconds=1181622265 constructorNanoseconds=133041285 totalConstructorNanoseconds=133041285 +RESULT name=sort n=100000000 t=4 iterations=1 durationNanoseconds=8189890498 totalDurationNanoseconds=8189890498 constructorNanoseconds=1363428860 totalConstructorNanoseconds=1363428860 +RESULT name=sort n=100 t=8 iterations=3 durationNanoseconds=386456 totalDurationNanoseconds=1159370 constructorNanoseconds=860 totalConstructorNanoseconds=2580 +RESULT name=sort n=1000 t=8 iterations=2 durationNanoseconds=565645 totalDurationNanoseconds=1131290 constructorNanoseconds=15305 totalConstructorNanoseconds=30610 +RESULT name=sort n=10001 t=8 iterations=2 durationNanoseconds=715080 totalDurationNanoseconds=1430160 constructorNanoseconds=72435 totalConstructorNanoseconds=144870 +RESULT name=sort n=100000 t=8 iterations=1 durationNanoseconds=5976559 totalDurationNanoseconds=5976559 constructorNanoseconds=1178210 totalConstructorNanoseconds=1178210 +RESULT name=sort n=10000000 t=8 iterations=1 durationNanoseconds=1166965728 totalDurationNanoseconds=1166965728 constructorNanoseconds=124620786 totalConstructorNanoseconds=124620786 +RESULT name=sort n=100000000 t=8 iterations=1 durationNanoseconds=7031099529 totalDurationNanoseconds=7031099529 constructorNanoseconds=1382599627 totalConstructorNanoseconds=1382599627 +RESULT name=sort n=100 t=12 iterations=3 durationNanoseconds=411886 totalDurationNanoseconds=1235660 constructorNanoseconds=756 totalConstructorNanoseconds=2270 +RESULT name=sort n=1000 t=12 iterations=2 durationNanoseconds=874880 totalDurationNanoseconds=1749760 constructorNanoseconds=13010 totalConstructorNanoseconds=26020 +RESULT name=sort n=10001 t=12 iterations=2 durationNanoseconds=1048975 totalDurationNanoseconds=2097950 constructorNanoseconds=68755 totalConstructorNanoseconds=137510 +RESULT name=sort n=100000 t=12 iterations=1 durationNanoseconds=5733409 totalDurationNanoseconds=5733409 constructorNanoseconds=1157440 totalConstructorNanoseconds=1157440 +RESULT name=sort n=10000000 t=12 iterations=1 durationNanoseconds=1212925449 totalDurationNanoseconds=1212925449 constructorNanoseconds=128551595 totalConstructorNanoseconds=128551595 +RESULT name=sort n=100000000 t=12 iterations=1 durationNanoseconds=7789121434 totalDurationNanoseconds=7789121434 constructorNanoseconds=1402616603 totalConstructorNanoseconds=1402616603 +RESULT name=sort n=100 t=16 iterations=3 durationNanoseconds=429000 totalDurationNanoseconds=1287000 constructorNanoseconds=1326 totalConstructorNanoseconds=3980 +RESULT name=sort n=1000 t=16 iterations=2 durationNanoseconds=908530 totalDurationNanoseconds=1817060 constructorNanoseconds=9385 totalConstructorNanoseconds=18770 +RESULT name=sort n=10001 t=16 iterations=1 durationNanoseconds=1256210 totalDurationNanoseconds=1256210 constructorNanoseconds=102130 totalConstructorNanoseconds=102130 +RESULT name=sort n=100000 t=16 iterations=1 durationNanoseconds=6508428 totalDurationNanoseconds=6508428 constructorNanoseconds=1205470 totalConstructorNanoseconds=1205470 +RESULT name=sort n=10000000 t=16 iterations=1 durationNanoseconds=1348730723 totalDurationNanoseconds=1348730723 constructorNanoseconds=126286186 totalConstructorNanoseconds=126286186 +RESULT name=sort n=100000000 t=16 iterations=1 durationNanoseconds=7396974089 totalDurationNanoseconds=7396974089 constructorNanoseconds=1374412728 totalConstructorNanoseconds=1374412728 diff --git a/result.txt b/result.txt index 02e6f78..abd4f28 100644 --- a/result.txt +++ b/result.txt @@ -1,30 +1,36 @@ -RESULT name=sort n=100 t=1 iterations=95 durationNanoseconds=10561 totalDurationNanoseconds=1003361 constructorNanoseconds=460 totalConstructorNanoseconds=43740 -RESULT name=sort n=1000 t=1 iterations=22 durationNanoseconds=46203 totalDurationNanoseconds=1016480 constructorNanoseconds=764 totalConstructorNanoseconds=16820 -RESULT name=sort n=10001 t=1 iterations=3 durationNanoseconds=459633 totalDurationNanoseconds=1378901 constructorNanoseconds=18656 totalConstructorNanoseconds=55970 -RESULT name=sort n=100000 t=1 iterations=1 durationNanoseconds=8231415 totalDurationNanoseconds=8231415 constructorNanoseconds=514381 totalConstructorNanoseconds=514381 -RESULT name=sort n=10000000 t=1 iterations=1 durationNanoseconds=744634713 totalDurationNanoseconds=744634713 constructorNanoseconds=51303216 totalConstructorNanoseconds=51303216 -RESULT name=sort n=100 t=2 iterations=18 durationNanoseconds=58551 totalDurationNanoseconds=1053931 constructorNanoseconds=361 totalConstructorNanoseconds=6510 -RESULT name=sort n=1000 t=2 iterations=9 durationNanoseconds=120015 totalDurationNanoseconds=1080141 constructorNanoseconds=1345 totalConstructorNanoseconds=12110 -RESULT name=sort n=10001 t=2 iterations=3 durationNanoseconds=495777 totalDurationNanoseconds=1487331 constructorNanoseconds=19473 totalConstructorNanoseconds=58420 -RESULT name=sort n=100000 t=2 iterations=1 durationNanoseconds=7827986 totalDurationNanoseconds=7827986 constructorNanoseconds=508510 totalConstructorNanoseconds=508510 -RESULT name=sort n=10000000 t=2 iterations=1 durationNanoseconds=1296871738 totalDurationNanoseconds=1296871738 constructorNanoseconds=49085315 totalConstructorNanoseconds=49085315 -RESULT name=sort n=100 t=4 iterations=9 durationNanoseconds=118596 totalDurationNanoseconds=1067371 constructorNanoseconds=455 totalConstructorNanoseconds=4100 -RESULT name=sort n=1000 t=4 iterations=5 durationNanoseconds=220166 totalDurationNanoseconds=1100831 constructorNanoseconds=2204 totalConstructorNanoseconds=11020 -RESULT name=sort n=10001 t=4 iterations=2 durationNanoseconds=502925 totalDurationNanoseconds=1005851 constructorNanoseconds=29945 totalConstructorNanoseconds=59890 -RESULT name=sort n=100000 t=4 iterations=1 durationNanoseconds=5620814 totalDurationNanoseconds=5620814 constructorNanoseconds=528570 totalConstructorNanoseconds=528570 -RESULT name=sort n=10000000 t=4 iterations=1 durationNanoseconds=1198704968 totalDurationNanoseconds=1198704968 constructorNanoseconds=50132516 totalConstructorNanoseconds=50132516 -RESULT name=sort n=100 t=8 iterations=5 durationNanoseconds=222358 totalDurationNanoseconds=1111791 constructorNanoseconds=1116 totalConstructorNanoseconds=5580 -RESULT name=sort n=1000 t=8 iterations=3 durationNanoseconds=457883 totalDurationNanoseconds=1373650 constructorNanoseconds=3950 totalConstructorNanoseconds=11851 -RESULT name=sort n=10001 t=8 iterations=2 durationNanoseconds=705680 totalDurationNanoseconds=1411361 constructorNanoseconds=30140 totalConstructorNanoseconds=60280 -RESULT name=sort n=100000 t=8 iterations=1 durationNanoseconds=6234564 totalDurationNanoseconds=6234564 constructorNanoseconds=530141 totalConstructorNanoseconds=530141 -RESULT name=sort n=10000000 t=8 iterations=1 durationNanoseconds=1219668902 totalDurationNanoseconds=1219668902 constructorNanoseconds=50095186 totalConstructorNanoseconds=50095186 -RESULT name=sort n=100 t=12 iterations=3 durationNanoseconds=336683 totalDurationNanoseconds=1010050 constructorNanoseconds=1553 totalConstructorNanoseconds=4660 -RESULT name=sort n=1000 t=12 iterations=2 durationNanoseconds=709845 totalDurationNanoseconds=1419691 constructorNanoseconds=3685 totalConstructorNanoseconds=7370 -RESULT name=sort n=10001 t=12 iterations=1 durationNanoseconds=1008971 totalDurationNanoseconds=1008971 constructorNanoseconds=45810 totalConstructorNanoseconds=45810 -RESULT name=sort n=100000 t=12 iterations=1 durationNanoseconds=4991544 totalDurationNanoseconds=4991544 constructorNanoseconds=494530 totalConstructorNanoseconds=494530 -RESULT name=sort n=10000000 t=12 iterations=1 durationNanoseconds=1295272477 totalDurationNanoseconds=1295272477 constructorNanoseconds=50175095 totalConstructorNanoseconds=50175095 -RESULT name=sort n=100 t=16 iterations=2 durationNanoseconds=668925 totalDurationNanoseconds=1337851 constructorNanoseconds=3290 totalConstructorNanoseconds=6580 -RESULT name=sort n=1000 t=16 iterations=2 durationNanoseconds=828375 totalDurationNanoseconds=1656751 constructorNanoseconds=5660 totalConstructorNanoseconds=11320 -RESULT name=sort n=10001 t=16 iterations=1 durationNanoseconds=1319491 totalDurationNanoseconds=1319491 constructorNanoseconds=65160 totalConstructorNanoseconds=65160 -RESULT name=sort n=100000 t=16 iterations=1 durationNanoseconds=7145805 totalDurationNanoseconds=7145805 constructorNanoseconds=529550 totalConstructorNanoseconds=529550 -RESULT name=sort n=10000000 t=16 iterations=1 durationNanoseconds=1483766932 totalDurationNanoseconds=1483766932 constructorNanoseconds=52444497 totalConstructorNanoseconds=52444497 +RESULT name=sort n=100 t=1 iterations=181 durationNanoseconds=5530 totalDurationNanoseconds=1001080 constructorNanoseconds=166 totalConstructorNanoseconds=30180 +RESULT name=sort n=1000 t=1 iterations=20 durationNanoseconds=50209 totalDurationNanoseconds=1004180 constructorNanoseconds=2275 totalConstructorNanoseconds=45500 +RESULT name=sort n=10001 t=1 iterations=2 durationNanoseconds=551520 totalDurationNanoseconds=1103040 constructorNanoseconds=78160 totalConstructorNanoseconds=156320 +RESULT name=sort n=100000 t=1 iterations=1 durationNanoseconds=8726749 totalDurationNanoseconds=8726749 constructorNanoseconds=1262160 totalConstructorNanoseconds=1262160 +RESULT name=sort n=10000000 t=1 iterations=1 durationNanoseconds=868109014 totalDurationNanoseconds=868109014 constructorNanoseconds=129001816 totalConstructorNanoseconds=129001816 +RESULT name=sort n=100000000 t=1 iterations=1 durationNanoseconds=16587735066 totalDurationNanoseconds=16587735066 constructorNanoseconds=1384105216 totalConstructorNanoseconds=1384105216 +RESULT name=sort n=100 t=2 iterations=13 durationNanoseconds=77370 totalDurationNanoseconds=1005810 constructorNanoseconds=370 totalConstructorNanoseconds=4820 +RESULT name=sort n=1000 t=2 iterations=8 durationNanoseconds=127625 totalDurationNanoseconds=1021000 constructorNanoseconds=5151 totalConstructorNanoseconds=41210 +RESULT name=sort n=10001 t=2 iterations=2 durationNanoseconds=648160 totalDurationNanoseconds=1296320 constructorNanoseconds=66385 totalConstructorNanoseconds=132770 +RESULT name=sort n=100000 t=2 iterations=1 durationNanoseconds=7803549 totalDurationNanoseconds=7803549 constructorNanoseconds=1158910 totalConstructorNanoseconds=1158910 +RESULT name=sort n=10000000 t=2 iterations=1 durationNanoseconds=1418380159 totalDurationNanoseconds=1418380159 constructorNanoseconds=131211135 totalConstructorNanoseconds=131211135 +RESULT name=sort n=100000000 t=2 iterations=1 durationNanoseconds=12091467754 totalDurationNanoseconds=12091467754 constructorNanoseconds=1417232259 totalConstructorNanoseconds=1417232259 +RESULT name=sort n=100 t=4 iterations=9 durationNanoseconds=118352 totalDurationNanoseconds=1065170 constructorNanoseconds=611 totalConstructorNanoseconds=5500 +RESULT name=sort n=1000 t=4 iterations=3 durationNanoseconds=359543 totalDurationNanoseconds=1078630 constructorNanoseconds=17990 totalConstructorNanoseconds=53970 +RESULT name=sort n=10001 t=4 iterations=2 durationNanoseconds=814465 totalDurationNanoseconds=1628930 constructorNanoseconds=134305 totalConstructorNanoseconds=268610 +RESULT name=sort n=100000 t=4 iterations=1 durationNanoseconds=6957149 totalDurationNanoseconds=6957149 constructorNanoseconds=1242780 totalConstructorNanoseconds=1242780 +RESULT name=sort n=10000000 t=4 iterations=1 durationNanoseconds=1117816157 totalDurationNanoseconds=1117816157 constructorNanoseconds=131642965 totalConstructorNanoseconds=131642965 +RESULT name=sort n=100000000 t=4 iterations=1 durationNanoseconds=8908347671 totalDurationNanoseconds=8908347671 constructorNanoseconds=1293569273 totalConstructorNanoseconds=1293569273 +RESULT name=sort n=100 t=8 iterations=4 durationNanoseconds=262700 totalDurationNanoseconds=1050800 constructorNanoseconds=660 totalConstructorNanoseconds=2640 +RESULT name=sort n=1000 t=8 iterations=1 durationNanoseconds=1083510 totalDurationNanoseconds=1083510 constructorNanoseconds=23680 totalConstructorNanoseconds=23680 +RESULT name=sort n=10001 t=8 iterations=2 durationNanoseconds=774170 totalDurationNanoseconds=1548340 constructorNanoseconds=78415 totalConstructorNanoseconds=156830 +RESULT name=sort n=100000 t=8 iterations=1 durationNanoseconds=5745139 totalDurationNanoseconds=5745139 constructorNanoseconds=1267620 totalConstructorNanoseconds=1267620 +RESULT name=sort n=10000000 t=8 iterations=1 durationNanoseconds=1254361820 totalDurationNanoseconds=1254361820 constructorNanoseconds=132939165 totalConstructorNanoseconds=132939165 +RESULT name=sort n=100000000 t=8 iterations=1 durationNanoseconds=7138660668 totalDurationNanoseconds=7138660668 constructorNanoseconds=1298459682 totalConstructorNanoseconds=1298459682 +RESULT name=sort n=100 t=12 iterations=3 durationNanoseconds=345353 totalDurationNanoseconds=1036060 constructorNanoseconds=1346 totalConstructorNanoseconds=4040 +RESULT name=sort n=1000 t=12 iterations=2 durationNanoseconds=587034 totalDurationNanoseconds=1174069 constructorNanoseconds=13240 totalConstructorNanoseconds=26480 +RESULT name=sort n=10001 t=12 iterations=2 durationNanoseconds=894959 totalDurationNanoseconds=1789919 constructorNanoseconds=73220 totalConstructorNanoseconds=146440 +RESULT name=sort n=100000 t=12 iterations=1 durationNanoseconds=5794189 totalDurationNanoseconds=5794189 constructorNanoseconds=1217119 totalConstructorNanoseconds=1217119 +RESULT name=sort n=10000000 t=12 iterations=1 durationNanoseconds=1233781405 totalDurationNanoseconds=1233781405 constructorNanoseconds=132758054 totalConstructorNanoseconds=132758054 +RESULT name=sort n=100000000 t=12 iterations=1 durationNanoseconds=7069741771 totalDurationNanoseconds=7069741771 constructorNanoseconds=1331710866 totalConstructorNanoseconds=1331710866 +RESULT name=sort n=100 t=16 iterations=3 durationNanoseconds=420606 totalDurationNanoseconds=1261819 constructorNanoseconds=1360 totalConstructorNanoseconds=4080 +RESULT name=sort n=1000 t=16 iterations=1 durationNanoseconds=1215220 totalDurationNanoseconds=1215220 constructorNanoseconds=20870 totalConstructorNanoseconds=20870 +RESULT name=sort n=10001 t=16 iterations=1 durationNanoseconds=1243860 totalDurationNanoseconds=1243860 constructorNanoseconds=113500 totalConstructorNanoseconds=113500 +RESULT name=sort n=100000 t=16 iterations=1 durationNanoseconds=6044489 totalDurationNanoseconds=6044489 constructorNanoseconds=1216550 totalConstructorNanoseconds=1216550 +RESULT name=sort n=10000000 t=16 iterations=1 durationNanoseconds=1294393674 totalDurationNanoseconds=1294393674 constructorNanoseconds=132577214 totalConstructorNanoseconds=132577214 +RESULT name=sort n=100000000 t=16 iterations=1 durationNanoseconds=7428194523 totalDurationNanoseconds=7428194523 constructorNanoseconds=1311964740 totalConstructorNanoseconds=1311964740 diff --git a/result.withoutbitset.txt b/result.withoutbitset.txt new file mode 100644 index 0000000..02e6f78 --- /dev/null +++ b/result.withoutbitset.txt @@ -0,0 +1,30 @@ +RESULT name=sort n=100 t=1 iterations=95 durationNanoseconds=10561 totalDurationNanoseconds=1003361 constructorNanoseconds=460 totalConstructorNanoseconds=43740 +RESULT name=sort n=1000 t=1 iterations=22 durationNanoseconds=46203 totalDurationNanoseconds=1016480 constructorNanoseconds=764 totalConstructorNanoseconds=16820 +RESULT name=sort n=10001 t=1 iterations=3 durationNanoseconds=459633 totalDurationNanoseconds=1378901 constructorNanoseconds=18656 totalConstructorNanoseconds=55970 +RESULT name=sort n=100000 t=1 iterations=1 durationNanoseconds=8231415 totalDurationNanoseconds=8231415 constructorNanoseconds=514381 totalConstructorNanoseconds=514381 +RESULT name=sort n=10000000 t=1 iterations=1 durationNanoseconds=744634713 totalDurationNanoseconds=744634713 constructorNanoseconds=51303216 totalConstructorNanoseconds=51303216 +RESULT name=sort n=100 t=2 iterations=18 durationNanoseconds=58551 totalDurationNanoseconds=1053931 constructorNanoseconds=361 totalConstructorNanoseconds=6510 +RESULT name=sort n=1000 t=2 iterations=9 durationNanoseconds=120015 totalDurationNanoseconds=1080141 constructorNanoseconds=1345 totalConstructorNanoseconds=12110 +RESULT name=sort n=10001 t=2 iterations=3 durationNanoseconds=495777 totalDurationNanoseconds=1487331 constructorNanoseconds=19473 totalConstructorNanoseconds=58420 +RESULT name=sort n=100000 t=2 iterations=1 durationNanoseconds=7827986 totalDurationNanoseconds=7827986 constructorNanoseconds=508510 totalConstructorNanoseconds=508510 +RESULT name=sort n=10000000 t=2 iterations=1 durationNanoseconds=1296871738 totalDurationNanoseconds=1296871738 constructorNanoseconds=49085315 totalConstructorNanoseconds=49085315 +RESULT name=sort n=100 t=4 iterations=9 durationNanoseconds=118596 totalDurationNanoseconds=1067371 constructorNanoseconds=455 totalConstructorNanoseconds=4100 +RESULT name=sort n=1000 t=4 iterations=5 durationNanoseconds=220166 totalDurationNanoseconds=1100831 constructorNanoseconds=2204 totalConstructorNanoseconds=11020 +RESULT name=sort n=10001 t=4 iterations=2 durationNanoseconds=502925 totalDurationNanoseconds=1005851 constructorNanoseconds=29945 totalConstructorNanoseconds=59890 +RESULT name=sort n=100000 t=4 iterations=1 durationNanoseconds=5620814 totalDurationNanoseconds=5620814 constructorNanoseconds=528570 totalConstructorNanoseconds=528570 +RESULT name=sort n=10000000 t=4 iterations=1 durationNanoseconds=1198704968 totalDurationNanoseconds=1198704968 constructorNanoseconds=50132516 totalConstructorNanoseconds=50132516 +RESULT name=sort n=100 t=8 iterations=5 durationNanoseconds=222358 totalDurationNanoseconds=1111791 constructorNanoseconds=1116 totalConstructorNanoseconds=5580 +RESULT name=sort n=1000 t=8 iterations=3 durationNanoseconds=457883 totalDurationNanoseconds=1373650 constructorNanoseconds=3950 totalConstructorNanoseconds=11851 +RESULT name=sort n=10001 t=8 iterations=2 durationNanoseconds=705680 totalDurationNanoseconds=1411361 constructorNanoseconds=30140 totalConstructorNanoseconds=60280 +RESULT name=sort n=100000 t=8 iterations=1 durationNanoseconds=6234564 totalDurationNanoseconds=6234564 constructorNanoseconds=530141 totalConstructorNanoseconds=530141 +RESULT name=sort n=10000000 t=8 iterations=1 durationNanoseconds=1219668902 totalDurationNanoseconds=1219668902 constructorNanoseconds=50095186 totalConstructorNanoseconds=50095186 +RESULT name=sort n=100 t=12 iterations=3 durationNanoseconds=336683 totalDurationNanoseconds=1010050 constructorNanoseconds=1553 totalConstructorNanoseconds=4660 +RESULT name=sort n=1000 t=12 iterations=2 durationNanoseconds=709845 totalDurationNanoseconds=1419691 constructorNanoseconds=3685 totalConstructorNanoseconds=7370 +RESULT name=sort n=10001 t=12 iterations=1 durationNanoseconds=1008971 totalDurationNanoseconds=1008971 constructorNanoseconds=45810 totalConstructorNanoseconds=45810 +RESULT name=sort n=100000 t=12 iterations=1 durationNanoseconds=4991544 totalDurationNanoseconds=4991544 constructorNanoseconds=494530 totalConstructorNanoseconds=494530 +RESULT name=sort n=10000000 t=12 iterations=1 durationNanoseconds=1295272477 totalDurationNanoseconds=1295272477 constructorNanoseconds=50175095 totalConstructorNanoseconds=50175095 +RESULT name=sort n=100 t=16 iterations=2 durationNanoseconds=668925 totalDurationNanoseconds=1337851 constructorNanoseconds=3290 totalConstructorNanoseconds=6580 +RESULT name=sort n=1000 t=16 iterations=2 durationNanoseconds=828375 totalDurationNanoseconds=1656751 constructorNanoseconds=5660 totalConstructorNanoseconds=11320 +RESULT name=sort n=10001 t=16 iterations=1 durationNanoseconds=1319491 totalDurationNanoseconds=1319491 constructorNanoseconds=65160 totalConstructorNanoseconds=65160 +RESULT name=sort n=100000 t=16 iterations=1 durationNanoseconds=7145805 totalDurationNanoseconds=7145805 constructorNanoseconds=529550 totalConstructorNanoseconds=529550 +RESULT name=sort n=10000000 t=16 iterations=1 durationNanoseconds=1483766932 totalDurationNanoseconds=1483766932 constructorNanoseconds=52444497 totalConstructorNanoseconds=52444497 diff --git a/src/container.cpp b/src/container.cpp index cd6c694..4022f78 100755 --- a/src/container.cpp +++ b/src/container.cpp @@ -2,22 +2,28 @@ #include #include +#include namespace ae { container::container(std::span data) { - // TODO create your datastructure from the given data - - // The code below is a simple example splitting the data into 16 blocks, - // but you may find other options better suited for your sorting algorithm. - constexpr std::size_t num_blocks = 16; - const std::ptrdiff_t elements_per_block = (data.size() + num_blocks - 1) / num_blocks; + // Reserve enough space for the chunks to avoid relocation while building the structure + container::Directory dir(data.size() / container::chunk_size + 1); + auto entry = 0; for (auto first = data.begin(); first < data.end();) { - const auto last = (data.end() - first) < elements_per_block ? data.end() : first + elements_per_block; - placeholder_.emplace_back(first, last); + const auto last = (data.end() - first) < container::chunk_size ? data.end() : first + container::chunk_size; + Chunk chunk; + + // This could be improved by just pointing dir[entry] to first, removing the copy process. + std::copy(first, last, chunk.begin()); + dir[entry++] = chunk; first = last; } + this->data = dir; + this->size_ = data.size(); } +uint32_t container::size() { return this->size_; } + } // namespace ae diff --git a/src/container.hpp b/src/container.hpp index 92d9d4e..65a2f55 100755 --- a/src/container.hpp +++ b/src/container.hpp @@ -15,22 +15,79 @@ class container { friend class sorter; public: + const static uint32_t chunk_size = 64; using element_type = std::uint64_t; + using Chunk = std::array; + using Directory = std::vector; + + // This class represents a random access iterator to enable std::range based usage similar to a single vector. + // As I've implemented the data structure as an array-directory as presented in the lecture, random access is possible in O(1). + // Since C++'s concept of iterators is more complex than similar implementations in java or rust, I've used an AI to help decipher which methods I need/should implement for a valid random access iterator. + // The code however is written by myself. + class Iterator { + private: + Directory* dir; + size_t index; + public: + Iterator(Directory* dir, size_t pos) { + this->dir = dir; + this->index = pos; + } + element_type& operator*() const { + return ((*dir)[index / chunk_size])[index % chunk_size]; + } + element_type* operator->() const { + return &((*dir)[index / chunk_size])[index % chunk_size]; + } + + Iterator& operator++() { ++index; return *this; } + Iterator operator++(int) { + auto tmp = *this; + ++(*this); + return tmp; + } + Iterator& operator--() { --index; return *this; } + Iterator operator--(int) { + auto tmp = *this; + --(*this); + return tmp; + } + + Iterator& operator+=(std::ptrdiff_t n) { index += n; return *this; } + Iterator& operator-=(std::ptrdiff_t n) { index -= n; return *this; } + Iterator operator+(std::ptrdiff_t n) const { return Iterator(dir, index + n); } + Iterator operator-(std::ptrdiff_t n) const { return Iterator(dir, index - n); } + + std::ptrdiff_t operator-(const Iterator& other) const { return index - other.index; } + element_type& operator[](std::ptrdiff_t n) const { return *(*this + n); } + + bool operator==(const Iterator& other) const { return index == other.index; } + bool operator!=(const Iterator& other) const { return index != other.index; } + bool operator<=(const Iterator& other) const { return index <= other.index; } + bool operator>=(const Iterator& other) const { return index >= other.index; } + bool operator<(const Iterator& other) const { return index < other.index; } + bool operator>(const Iterator& other) const { return index > other.index; } + }; explicit container(std::span data); - // TODO You may also add additional functions (or data members). - private: // TODO define your data layout // Your datastructure should consist of multiple blocks of data, which don't // necessarily have to be vectors. - std::vector> placeholder_; + + // std::vector> placeholder_; + std::vector> data; + uint32_t size_; public: - [[nodiscard]] auto to_view() const { - return std::views::join(placeholder_); + [[nodiscard]] auto to_view() const { + // join does not respect the boundary given by the end iterator, so we have to trim it. + return std::views::join(data) | std::views::take(size_); } + Iterator begin() { return Iterator(&data, 0); } + Iterator end() { return Iterator(&data, size_); } + uint32_t size(); }; } // namespace ae diff --git a/src/sorter.cpp b/src/sorter.cpp index 5066f28..c971ccd 100755 --- a/src/sorter.cpp +++ b/src/sorter.cpp @@ -24,58 +24,61 @@ sorter::sorter(uint32_t num = 1) { } void sorter::sort(container& data) { - for (auto i = 1uz; i < data.placeholder_.size(); ++i) { - std::ranges::copy(data.placeholder_[i], std::back_inserter(data.placeholder_[0])); - data.placeholder_[i].clear(); - } + // for (auto i = 1uz; i < data.placeholder_.size(); ++i) { + // std::ranges::copy(data.placeholder_[i], std::back_inserter(data.placeholder_[0])); + // data.placeholder_[i].clear(); + // } #if DEBUG - for (int i = 0; i < data.placeholder_[0].size(); i++) { - if (copy[i] != data.placeholder_[0][i]) - std::cerr << i << " before:" << data.placeholder_[0][i] << std::endl; - } - std::vector copy; - std::ranges::copy(data.placeholder_[0], std::back_inserter(copy)); + for (auto element : data) { + copy.push_back(element); + } + auto begin = data.begin(); + for (int i = 0; i < data.size(); i++) { + std::cerr << i << " before:" << begin[i] << std::endl; + } std::sort(copy.begin(), copy.end()); #endif - sorter::msd_inplace_radix_sort(data.placeholder_[0], 0, [&](auto span) {sorter::robin_hood_sort(span);}); + sorter::msd_inplace_radix_sort(data.begin(), data.end(), 0, [&](auto begin, auto end) {sorter::robin_hood_sort(begin, end);}); while (sorter::handler->size() > 0 || sorter::handler->isWorking()) {}; #if DEBUG + std::cerr << "Final check if sorted" << std::endl; for (int i = 0; i < copy.size(); i++) { - if (copy[i] != data.placeholder_[0][i]) - std::cerr << i << " " << "sorted: " << copy[i] << " actual:" << data.placeholder_[0][i] << std::endl; + if (copy[i] != begin[i]) + std::cerr << i << " " << "sorted: " << copy[i] << " actual:" << begin[i] << std::endl; } #endif } void sorter::msd_inplace_radix_sort_binary( - std::span range, + container::Iterator begin, + container::Iterator end, size_t passes, - const std::function bucket)>& bucket_sort + const std::function& bucket_sort ) { - if (std::begin(range) >= std::end(range)) { + if (begin >= end) { return; } if (sorter::RADIX_ITERATIONS == passes) { - switch (range.size()) { + switch (end - begin) { case 1: return; case 2: - if (range[0] >= range[1]) { - std::swap(range[0], range[1]); + if (begin[0] >= begin[1]) { + std::swap(begin[0], begin[1]); } return; default: - bucket_sort(range); + bucket_sort(begin, end); return; } - if (range.size() > 1) { - bucket_sort(range); + if (end - begin > 1) { + bucket_sort(begin, end); } return; } - auto lower = std::begin(range); - auto upper = std::end(range); + auto lower = begin; + auto upper = end; while (lower < upper) { if (*lower & (1L << (sizeof(container::element_type) * CHAR_BIT - passes - 1))) { @@ -87,23 +90,24 @@ void sorter::msd_inplace_radix_sort_binary( } } #if DEBUG - std::cerr << "pass: " << passes << " begin: " << &*std::begin(range) << " end: " << &*std::end(range) << " lower: " << &*lower << std::endl; + std::cerr << "pass: " << passes << " begin: " << &*begin << " end: " << &*end << " lower: " << &*lower << std::endl; #endif - sorter::msd_inplace_radix_sort_binary(std::span (std::begin(range), lower), passes + 1, bucket_sort); - sorter::msd_inplace_radix_sort_binary(std::span (lower, std::end(range)), passes + 1, bucket_sort); + sorter::msd_inplace_radix_sort_binary(begin, lower, passes + 1, bucket_sort); + sorter::msd_inplace_radix_sort_binary(lower, end, passes + 1, bucket_sort); } void sorter::msd_inplace_radix_sort( - std::span range, + container::Iterator begin, + container::Iterator end, size_t passes, - const std::function bucket)>& bucket_sort + const std::function& bucket_sort ) { - if (std::begin(range) >= std::end(range)) { + if (begin > end) { return; } - if (range.size() <= sorter::SMALL_SORT_THRESHHOLD) { - bucket_sort(range); + if ((end - begin) <= sorter::SMALL_SORT_THRESHHOLD) { + bucket_sort(begin, end); return; } @@ -114,8 +118,8 @@ void sorter::msd_inplace_radix_sort( auto mask_bucket = [&](container::element_type* element){ return (*element & upper_bucket_mask) >> (sizeof(container::element_type) * CHAR_BIT - sorter::RADIX_SIZE) * (1 + passes); }; - for (auto element : range) { - auto bucket = mask_bucket(&element); + for (auto element = begin; element < end; ++element) { + auto bucket = mask_bucket(&*element); bucket_sizes[bucket]++; } #if DEBUG @@ -127,15 +131,15 @@ void sorter::msd_inplace_radix_sort( #endif // We now point each bucket to its start location in the range - container::element_type* buckets_end[sorter::RADIX_BUCKETS]; - container::element_type* buckets_start[sorter::RADIX_BUCKETS]; + container::Iterator* buckets_end[sorter::RADIX_BUCKETS]; + container::Iterator* buckets_start[sorter::RADIX_BUCKETS]; #if DEBUG std::cerr << "Starting bucket" << std::endl; #endif auto count = 0; for (int i = 0; i < sorter::RADIX_BUCKETS; ++i) { - buckets_end[i] = &range[count]; - buckets_start[i] = &range[count]; + buckets_end[i] = new container::Iterator(begin + count); + buckets_start[i] = new container::Iterator(begin + count); #if DEBUG std::cerr << "bucket " << i << " at " << count << std::endl; #endif @@ -146,36 +150,36 @@ void sorter::msd_inplace_radix_sort( #endif // Loop over the elements and swap them into the correct buckets. // This will look at each element exactly once. - auto element = &range[0]; - while (element < &*std::end(range)) { - uint32_t bucket = mask_bucket(element); + auto element = begin; + while (element < end) { + uint32_t bucket = mask_bucket(&*element); // Check if we are currently in the bounds of the corresponding bucket - if (&*element >= buckets_start[bucket] && &*element < buckets_end[bucket]) { + if (element >= *buckets_start[bucket] && element < *buckets_end[bucket]) { // The element is in the correct bucket, we skip to the end of the bucket - element = buckets_end[bucket]; + element = *buckets_end[bucket]; } else { // The element is not in the correct bucket; swap - std::swap(*element, *buckets_end[bucket]); - buckets_end[bucket]++; + std::swap(*element, **buckets_end[bucket]); + (*buckets_end[bucket])++; } } #if DEBUG - for (int i = 0; i < range.size(); i++) { - std::cerr << i << " reordered:" << range[i] << std::endl; + for (int i = 0; i < end - begin; i++) { + std::cerr << i << " reordered:" << begin[i] << std::endl; } std::cerr << "Finish reordering elements" << std::endl; std::cerr << "Bucket elements at begin of bucket" << std::endl; for (auto bucket : buckets_start) { - std::cerr << *bucket << " bucket " << mask_bucket(bucket) << std::endl; + std::cerr << (**bucket) << " bucket " << mask_bucket(&**bucket) << std::endl; } std::cerr << std::endl; #endif for (auto i = 0; i < sorter::RADIX_BUCKETS - 1; ++i) { - assert(buckets_end[i] == buckets_start[i + 1]); + assert(*buckets_end[i] == *buckets_start[i + 1]); } - assert(buckets_end[sorter::RADIX_BUCKETS - 1] == &*std::end(range)); + assert(*buckets_end[sorter::RADIX_BUCKETS - 1] == end); #if DEBUG std::cerr << "Ranges of buckets are correct" << std::endl; #endif @@ -191,7 +195,7 @@ void sorter::msd_inplace_radix_sort( #if DEBUG std::cerr << "Starting task with depth " << passes << " of bucket " << i << std::endl; #endif - sorter::msd_inplace_radix_sort(std::span (start, end), passes + 1, bucket_sort); + sorter::msd_inplace_radix_sort(*start, *end, passes + 1, bucket_sort); #if DEBUG std::cerr << "Finishing task with depth " << passes << " of bucket " << i << std::endl; #endif @@ -199,26 +203,26 @@ void sorter::msd_inplace_radix_sort( } } -void sorter::robin_hood_sort(std::span bucket) { - const auto size = bucket.size() + sorter::OVERHEAD_SIZE; +void sorter::robin_hood_sort(container::Iterator begin, container::Iterator end) { + const auto size = (end - begin) + sorter::OVERHEAD_SIZE; const auto mask = ((1L) << (sizeof(container::element_type) * CHAR_BIT - sorter::RADIX_ITERATIONS)) - 1; std::vector space(size, -1L); - for (auto element : bucket) { - auto masked_element = (element & mask); - auto index = ((masked_element) * bucket.size()) / mask; + for (auto element = begin; element < end; ++element) { + auto masked_element = (*element & mask); + auto index = ((masked_element) * (end - begin)) / mask; if (space[index] == -1) { - space[index] = element; + space[index] = *element; } else { #if DEBUG - std::cerr << "Linear probing of " << element << " at index " << index << ". Current element " << space[index] << std::endl; + std::cerr << "Linear probing of " << *element << " at index " << index << ". Current element " << space[index] << std::endl; #endif auto i = index; // linear probing while (i < size - 1 && space[i] != -1) {++i;}; #if DEBUG - std::cerr << "Inserting " << element << " at index " << i << " instead of " << index << std::endl; + std::cerr << "Inserting " << *element << " at index " << i << " instead of " << index << std::endl; #endif - space[i] = element; + space[i] = *element; } } @@ -241,8 +245,8 @@ void sorter::robin_hood_sort(std::span bucket) { #if DEBUG std::cerr << "Original\n"; - for (auto element : bucket) { - std::cerr << element << " "; + for (auto element = begin; element < end; ++element) { + std::cerr << *element << " "; } std::cerr << std::endl; @@ -255,7 +259,7 @@ void sorter::robin_hood_sort(std::span bucket) { // copy data back into original range auto i = 0; - for (auto element = std::begin(bucket); element < std::end(bucket); ++element) { + for (auto element = begin; element < end; ++element) { *element = space[i]; ++i; } diff --git a/src/sorter.hpp b/src/sorter.hpp index f395283..be42e8b 100755 --- a/src/sorter.hpp +++ b/src/sorter.hpp @@ -17,9 +17,10 @@ class sorter { sorter(uint32_t num_threads); void msd_inplace_radix_sort( - std::span range, + container::Iterator begin, + container::Iterator end, size_t passes, - const std::function bucket)>& bucket_sort + const std::function& bucket_sort ); void parallel_msd_inplace_radix_sort( @@ -29,9 +30,10 @@ class sorter { ); void msd_inplace_radix_sort_binary( - std::span range, + container::Iterator begin, + container::Iterator end, size_t passes, - const std::function bucket)>& bucket_sort + const std::function& bucket_sort ); const uint32_t OVERHEAD_SIZE = 100L; @@ -42,7 +44,7 @@ class sorter { uint32_t num_threads; TaskHandler* handler = nullptr; - void robin_hood_sort(std::span range); + void robin_hood_sort(container::Iterator begin, container::Iterator end); }; } // namespace ae