| 1 | // Contains get_min_count, the core optimization of the spreadsort algorithm. |
| 2 | // Also has other helper functions commonly useful across variants. |
| 3 | |
| 4 | // Copyright Steven J. Ross 2001 - 2014. |
| 5 | // Distributed under the Boost Software License, Version 1.0. |
| 6 | // (See accompanying file LICENSE_1_0.txt or copy at |
| 7 | // http://www.boost.org/LICENSE_1_0.txt) |
| 8 | |
| 9 | // See http://www.boost.org/libs/sort for library home page. |
| 10 | |
| 11 | /* |
| 12 | Some improvements suggested by: |
| 13 | Phil Endecott and Frank Gennari |
| 14 | */ |
| 15 | |
| 16 | #ifndef BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_COMMON_HPP |
| 17 | #define BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_COMMON_HPP |
| 18 | #include <algorithm> |
| 19 | #include <vector> |
| 20 | #include <cstring> |
| 21 | #include <limits> |
| 22 | #include <functional> |
| 23 | #include <boost/static_assert.hpp> |
| 24 | #include <boost/sort/pdqsort/pdqsort.hpp> |
| 25 | #include <boost/sort/spreadsort/detail/constants.hpp> |
| 26 | #include <boost/cstdint.hpp> |
| 27 | |
| 28 | namespace boost { |
| 29 | namespace sort { |
| 30 | namespace spreadsort { |
| 31 | namespace detail { |
| 32 | //This only works on unsigned data types |
| 33 | template <typename T> |
| 34 | inline unsigned |
| 35 | rough_log_2_size(const T& input) |
| 36 | { |
| 37 | unsigned result = 0; |
| 38 | //The && is necessary on some compilers to avoid infinite loops |
| 39 | //it doesn't significantly impair performance |
| 40 | while ((result < (8*sizeof(T))) && (input >> result)) ++result; |
| 41 | return result; |
| 42 | } |
| 43 | |
| 44 | //Gets the minimum size to call spreadsort on to control worst-case runtime. |
| 45 | //This is called for a set of bins, instead of bin-by-bin, to minimize |
| 46 | //runtime overhead. |
| 47 | //This could be replaced by a lookup table of sizeof(Div_type)*8 but this |
| 48 | //function is more general. |
| 49 | template<unsigned log_mean_bin_size, |
| 50 | unsigned log_min_split_count, unsigned log_finishing_count> |
| 51 | inline size_t |
| 52 | get_min_count(unsigned log_range) |
| 53 | { |
| 54 | const size_t typed_one = 1; |
| 55 | const unsigned min_size = log_mean_bin_size + log_min_split_count; |
| 56 | //Assuring that constants have valid settings |
| 57 | BOOST_STATIC_ASSERT(log_min_split_count <= max_splits && |
| 58 | log_min_split_count > 0); |
| 59 | BOOST_STATIC_ASSERT(max_splits > 1 && |
| 60 | max_splits < (8 * sizeof(unsigned))); |
| 61 | BOOST_STATIC_ASSERT(max_finishing_splits >= max_splits && |
| 62 | max_finishing_splits < (8 * sizeof(unsigned))); |
| 63 | BOOST_STATIC_ASSERT(log_mean_bin_size >= 0); |
| 64 | BOOST_STATIC_ASSERT(log_finishing_count >= 0); |
| 65 | //if we can complete in one iteration, do so |
| 66 | //This first check allows the compiler to optimize never-executed code out |
| 67 | if (log_finishing_count < min_size) { |
| 68 | if (log_range <= min_size && log_range <= max_splits) { |
| 69 | //Return no smaller than a certain minimum limit |
| 70 | if (log_range <= log_finishing_count) |
| 71 | return typed_one << log_finishing_count; |
| 72 | return typed_one << log_range; |
| 73 | } |
| 74 | } |
| 75 | const unsigned base_iterations = max_splits - log_min_split_count; |
| 76 | //sum of n to n + x = ((x + 1) * (n + (n + x)))/2 + log_mean_bin_size |
| 77 | const unsigned base_range = |
| 78 | ((base_iterations + 1) * (max_splits + log_min_split_count))/2 |
| 79 | + log_mean_bin_size; |
| 80 | //Calculating the required number of iterations, and returning |
| 81 | //1 << (iteration_count + min_size) |
| 82 | if (log_range < base_range) { |
| 83 | unsigned result = log_min_split_count; |
| 84 | for (unsigned offset = min_size; offset < log_range; |
| 85 | offset += ++result); |
| 86 | //Preventing overflow; this situation shouldn't occur |
| 87 | if ((result + log_mean_bin_size) >= (8 * sizeof(size_t))) |
| 88 | return typed_one << ((8 * sizeof(size_t)) - 1); |
| 89 | return typed_one << (result + log_mean_bin_size); |
| 90 | } |
| 91 | //A quick division can calculate the worst-case runtime for larger ranges |
| 92 | unsigned remainder = log_range - base_range; |
| 93 | //the max_splits - 1 is used to calculate the ceiling of the division |
| 94 | unsigned bit_length = ((((max_splits - 1) + remainder)/max_splits) |
| 95 | + base_iterations + min_size); |
| 96 | //Preventing overflow; this situation shouldn't occur |
| 97 | if (bit_length >= (8 * sizeof(size_t))) |
| 98 | return typed_one << ((8 * sizeof(size_t)) - 1); |
| 99 | //n(log_range)/max_splits + C, optimizing worst-case performance |
| 100 | return typed_one << bit_length; |
| 101 | } |
| 102 | |
| 103 | // Resizes the bin cache and bin sizes, and initializes each bin size to 0. |
| 104 | // This generates the memory overhead to use in radix sorting. |
| 105 | template <class RandomAccessIter> |
| 106 | inline RandomAccessIter * |
| 107 | size_bins(size_t *bin_sizes, std::vector<RandomAccessIter> |
| 108 | &bin_cache, unsigned cache_offset, unsigned &cache_end, unsigned bin_count) |
| 109 | { |
| 110 | // Clear the bin sizes |
| 111 | for (size_t u = 0; u < bin_count; u++) |
| 112 | bin_sizes[u] = 0; |
| 113 | //Make sure there is space for the bins |
| 114 | cache_end = cache_offset + bin_count; |
| 115 | if (cache_end > bin_cache.size()) |
| 116 | bin_cache.resize(cache_end); |
| 117 | return &(bin_cache[cache_offset]); |
| 118 | } |
| 119 | } |
| 120 | } |
| 121 | } |
| 122 | } |
| 123 | |
| 124 | #endif |
| 125 | |