#include "sorter.hpp" #include #include #include #include #include #include #define DEBUG false namespace ae { void sorter::sort(container& data) { for (auto i = 1uz; i < data.placeholder_.size(); ++i) { std::ranges::copy(data.placeholder_[i], std::back_inserter(data.placeholder_[0])); data.placeholder_[i].clear(); } #if DEBUG for (int i = 0; i < data.placeholder_[0].size(); i++) { // if (copy[i] != data.placeholder_[0][i]) std::cerr << i << " before:" << data.placeholder_[0][i] << std::endl; } std::vector copy; std::ranges::copy(data.placeholder_[0], std::back_inserter(copy)); std::sort(copy.begin(), copy.end()); #endif sorter::msd_inplace_radix_sort(data.placeholder_[0], 0, [&](auto span) {sorter::robin_hood_sort(span);}); #if DEBUG for (int i = 0; i < copy.size(); i++) { if (copy[i] != data.placeholder_[0][i]) std::cerr << i << " " << "sorted: " << copy[i] << " actual:" << data.placeholder_[0][i] << std::endl; } #endif } void sorter::msd_inplace_radix_sort_binary( std::span range, size_t passes, const std::function bucket)>& bucket_sort ) { if (std::begin(range) >= std::end(range)) { return; } if (sorter::RADIX_ITERATIONS == passes) { switch (range.size()) { case 1: return; case 2: if (range[0] >= range[1]) { std::swap(range[0], range[1]); } return; default: bucket_sort(range); return; } if (range.size() > 1) { bucket_sort(range); } return; } auto lower = std::begin(range); auto upper = std::end(range); while (lower < upper) { if (*lower & (1L << (sizeof(container::element_type) * CHAR_BIT - passes - 1))) { // The -left bit is set, so move to the beginning of the end section and decrement the upper iterator --upper; std::swap(*upper, *lower); } else { ++lower; } } #if DEBUG std::cerr << "pass: " << passes << " begin: " << &*std::begin(range) << " end: " << &*std::end(range) << " lower: " << &*lower << std::endl; #endif sorter::msd_inplace_radix_sort_binary(std::span (std::begin(range), lower), passes + 1, bucket_sort); sorter::msd_inplace_radix_sort_binary(std::span (lower, std::end(range)), passes + 1, bucket_sort); } void sorter::msd_inplace_radix_sort( std::span range, size_t passes, const std::function bucket)>& bucket_sort ) { if (std::begin(range) >= std::end(range)) { return; } if (range.size() <= sorter::SMALL_SORT_THRESHHOLD) { bucket_sort(range); return; } // We first determine the number of elements per bucket // This is one pass additional pass over the elements and needs O(buckets) additional space, so in one configuration constant overhead uint32_t bucket_sizes[sorter::RADIX_BUCKETS] = { 0 }; auto upper_bucket_mask = ((1L << sorter::RADIX_SIZE) - 1) << (sizeof(container::element_type) * CHAR_BIT - sorter::RADIX_SIZE * (1 + passes)); auto mask_bucket = [&](container::element_type* element){ return (*element & upper_bucket_mask) >> (sizeof(container::element_type) * CHAR_BIT - sorter::RADIX_SIZE) * (1 + passes); }; for (auto element : range) { auto bucket = mask_bucket(&element); bucket_sizes[bucket]++; } #if DEBUG std::cerr << "Bucket sizes: "; for (auto bucket : bucket_sizes) { std::cerr << bucket << " "; } std::cerr << std::endl; #endif // We now point each bucket to its start location in the range container::element_type* buckets_end[sorter::RADIX_BUCKETS]; container::element_type* buckets_start[sorter::RADIX_BUCKETS]; #if DEBUG std::cerr << "Starting bucket" << std::endl; #endif auto count = 0; for (int i = 0; i < sorter::RADIX_BUCKETS; ++i) { buckets_end[i] = &range[count]; buckets_start[i] = &range[count]; #if DEBUG std::cerr << "bucket " << i << " at " << count << std::endl; #endif count += bucket_sizes[i]; } #if DEBUG std::cerr << "finish" << std::endl; #endif // Loop over the elements and swap them into the correct buckets. // This will look at each element exactly once. auto element = &range[0]; while (element < &*std::end(range)) { uint32_t bucket = mask_bucket(element); // Check if we are currently in the bounds of the corresponding bucket if (&*element >= buckets_start[bucket] && &*element < buckets_end[bucket]) { // The element is in the correct bucket, we skip to the end of the bucket element = buckets_end[bucket]; } else { // The element is not in the correct bucket; swap std::swap(*element, *buckets_end[bucket]); buckets_end[bucket]++; } } #if DEBUG for (int i = 0; i < range.size(); i++) { std::cerr << i << " reordered:" << range[i] << std::endl; } std::cerr << "Finish reordering elements" << std::endl; std::cerr << "Bucket elements at begin of bucket" << std::endl; for (auto bucket : buckets_start) { std::cerr << *bucket << " bucket " << mask_bucket(bucket) << std::endl; } std::cerr << std::endl; #endif for (auto i = 0; i < sorter::RADIX_BUCKETS - 1; ++i) { assert(buckets_end[i] == buckets_start[i + 1]); } assert(buckets_end[sorter::RADIX_BUCKETS - 1] == &*std::end(range)); #if DEBUG std::cerr << "Ranges of buckets are correct" << std::endl; #endif // sort each bucket recursively for (auto i = 0; i < sorter::RADIX_BUCKETS; i++) { sorter::msd_inplace_radix_sort(std::span (buckets_start[i], buckets_end[i]), passes + 1, bucket_sort); } } void sorter::robin_hood_sort(std::span bucket) { const auto size = bucket.size() + sorter::OVERHEAD_SIZE; const auto mask = ((1L) << (sizeof(container::element_type) * CHAR_BIT - sorter::RADIX_ITERATIONS)) - 1; std::vector space(size, -1L); for (auto element : bucket) { auto masked_element = (element & mask); auto index = ((masked_element) * bucket.size()) / mask; if (space[index] == -1) { space[index] = element; } else { #if DEBUG std::cerr << "Linear probing of " << element << " at index " << index << ". Current element " << space[index] << std::endl; #endif auto i = index; // linear probing while (i < size - 1 && space[i] != -1) {++i;}; #if DEBUG std::cerr << "Inserting " << element << " at index " << i << " instead of " << index << std::endl; #endif space[i] = element; } } #if DEBUG std::cerr << "Unsorted\n"; for (auto element : space) { std::cerr << element << " "; } std::cerr << std::endl; #endif // One final pass to correct linear probing errors for (auto i = 1; i < size; ++i) { auto j = i; while ((uint64_t) space[j-1] > space[j] && j > 0) { std::swap((space[j]),space[j-1]); j--; } } #if DEBUG std::cerr << "Original\n"; for (auto element : bucket) { std::cerr << element << " "; } std::cerr << std::endl; std::cerr << "Checking if sorted\n"; for (auto element : space) { std::cerr << element << " "; } std::cerr << std::endl; #endif // copy data back into original range auto i = 0; for (auto element = std::begin(bucket); element < std::end(bucket); ++element) { *element = space[i]; ++i; } } } // namespace ae