stabilize build system: depends, installer, boost/bdb fixes, cross targets groundwork
This commit is contained in:
@@ -0,0 +1,19 @@
|
||||
// The Boost Sort library cumulative header.
|
||||
|
||||
// Copyright Steven J. Ross 2014
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// See http://www.boost.org/libs/sort/ for library home page.
|
||||
|
||||
#ifndef BOOST_SORT_HPP
|
||||
#define BOOST_SORT_HPP
|
||||
|
||||
/*
|
||||
Cumulative include for the Boost Sort library
|
||||
*/
|
||||
|
||||
#include <boost/sort/spreadsort/spreadsort.hpp>
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,46 @@
|
||||
//constant definitions for the Boost Sort library
|
||||
|
||||
// Copyright Steven J. Ross 2001 - 2014
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// See http://www.boost.org/libs/sort for library home page.
|
||||
#ifndef BOOST_SORT_SPREADSORT_DETAIL_CONSTANTS
|
||||
#define BOOST_SORT_SPREADSORT_DETAIL_CONSTANTS
|
||||
namespace boost {
|
||||
namespace sort {
|
||||
namespace spreadsort {
|
||||
namespace detail {
|
||||
//Tuning constants
|
||||
//This should be tuned to your processor cache;
|
||||
//if you go too large you get cache misses on bins
|
||||
//The smaller this number, the less worst-case memory usage.
|
||||
//If too small, too many recursions slow down spreadsort
|
||||
enum { max_splits = 11,
|
||||
//It's better to have a few cache misses and finish sorting
|
||||
//than to run another iteration
|
||||
max_finishing_splits = max_splits + 1,
|
||||
//Sets the minimum number of items per bin.
|
||||
int_log_mean_bin_size = 2,
|
||||
//Used to force a comparison-based sorting for small bins, if it's faster.
|
||||
//Minimum value 1
|
||||
int_log_min_split_count = 9,
|
||||
//This is the minimum split count to use spreadsort when it will finish in one
|
||||
//iteration. Make this larger the faster std::sort is relative to integer_sort.
|
||||
int_log_finishing_count = 31,
|
||||
//Sets the minimum number of items per bin for floating point.
|
||||
float_log_mean_bin_size = 2,
|
||||
//Used to force a comparison-based sorting for small bins, if it's faster.
|
||||
//Minimum value 1
|
||||
float_log_min_split_count = 8,
|
||||
//This is the minimum split count to use spreadsort when it will finish in one
|
||||
//iteration. Make this larger the faster std::sort is relative to float_sort.
|
||||
float_log_finishing_count = 4,
|
||||
//There is a minimum size below which it is not worth using spreadsort
|
||||
min_sort_size = 1000 };
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,831 @@
|
||||
// Details for templated Spreadsort-based float_sort.
|
||||
|
||||
// Copyright Steven J. Ross 2001 - 2014.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// See http://www.boost.org/libs/sort for library home page.
|
||||
|
||||
/*
|
||||
Some improvements suggested by:
|
||||
Phil Endecott and Frank Gennari
|
||||
float_mem_cast fix provided by:
|
||||
Scott McMurray
|
||||
*/
|
||||
|
||||
#ifndef BOOST_SORT_SPREADSORT_DETAIL_FLOAT_SORT_HPP
|
||||
#define BOOST_SORT_SPREADSORT_DETAIL_FLOAT_SORT_HPP
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#include <functional>
|
||||
#include <boost/static_assert.hpp>
|
||||
#include <boost/serialization/static_warning.hpp>
|
||||
#include <boost/utility/enable_if.hpp>
|
||||
#include <boost/sort/spreadsort/detail/constants.hpp>
|
||||
#include <boost/sort/spreadsort/detail/integer_sort.hpp>
|
||||
#include <boost/sort/spreadsort/detail/spreadsort_common.hpp>
|
||||
#include <boost/cstdint.hpp>
|
||||
|
||||
namespace boost {
|
||||
namespace sort {
|
||||
namespace spreadsort {
|
||||
namespace detail {
|
||||
//Casts a RandomAccessIter to the specified integer type
|
||||
template<class Cast_type, class RandomAccessIter>
|
||||
inline Cast_type
|
||||
cast_float_iter(const RandomAccessIter & floatiter)
|
||||
{
|
||||
typedef typename std::iterator_traits<RandomAccessIter>::value_type
|
||||
Data_type;
|
||||
//Only cast IEEE floating-point numbers, and only to same-sized integers
|
||||
BOOST_STATIC_ASSERT(sizeof(Cast_type) == sizeof(Data_type));
|
||||
BOOST_STATIC_ASSERT(std::numeric_limits<Data_type>::is_iec559);
|
||||
BOOST_STATIC_ASSERT(std::numeric_limits<Cast_type>::is_integer);
|
||||
Cast_type result;
|
||||
std::memcpy(&result, &(*floatiter), sizeof(Data_type));
|
||||
return result;
|
||||
}
|
||||
|
||||
// Return true if the list is sorted. Otherwise, find the minimum and
|
||||
// maximum. Values are Right_shifted 0 bits before comparison.
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift>
|
||||
inline bool
|
||||
is_sorted_or_find_extremes(RandomAccessIter current, RandomAccessIter last,
|
||||
Div_type & max, Div_type & min, Right_shift rshift)
|
||||
{
|
||||
min = max = rshift(*current, 0);
|
||||
RandomAccessIter prev = current;
|
||||
bool sorted = true;
|
||||
while (++current < last) {
|
||||
Div_type value = rshift(*current, 0);
|
||||
sorted &= *current >= *prev;
|
||||
prev = current;
|
||||
if (max < value)
|
||||
max = value;
|
||||
else if (value < min)
|
||||
min = value;
|
||||
}
|
||||
return sorted;
|
||||
}
|
||||
|
||||
// Return true if the list is sorted. Otherwise, find the minimum and
|
||||
// maximum. Uses comp to check if the data is already sorted.
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift,
|
||||
class Compare>
|
||||
inline bool
|
||||
is_sorted_or_find_extremes(RandomAccessIter current, RandomAccessIter last,
|
||||
Div_type & max, Div_type & min,
|
||||
Right_shift rshift, Compare comp)
|
||||
{
|
||||
min = max = rshift(*current, 0);
|
||||
RandomAccessIter prev = current;
|
||||
bool sorted = true;
|
||||
while (++current < last) {
|
||||
Div_type value = rshift(*current, 0);
|
||||
sorted &= !comp(*current, *prev);
|
||||
prev = current;
|
||||
if (max < value)
|
||||
max = value;
|
||||
else if (value < min)
|
||||
min = value;
|
||||
}
|
||||
return sorted;
|
||||
}
|
||||
|
||||
//Specialized swap loops for floating-point casting
|
||||
template <class RandomAccessIter, class Div_type>
|
||||
inline void inner_float_swap_loop(RandomAccessIter * bins,
|
||||
const RandomAccessIter & nextbinstart, unsigned ii
|
||||
, const unsigned log_divisor, const Div_type div_min)
|
||||
{
|
||||
RandomAccessIter * local_bin = bins + ii;
|
||||
for (RandomAccessIter current = *local_bin; current < nextbinstart;
|
||||
++current) {
|
||||
for (RandomAccessIter * target_bin =
|
||||
(bins + ((cast_float_iter<Div_type, RandomAccessIter>(current) >>
|
||||
log_divisor) - div_min)); target_bin != local_bin;
|
||||
target_bin = bins + ((cast_float_iter<Div_type, RandomAccessIter>
|
||||
(current) >> log_divisor) - div_min)) {
|
||||
typename std::iterator_traits<RandomAccessIter>::value_type tmp;
|
||||
RandomAccessIter b = (*target_bin)++;
|
||||
RandomAccessIter * b_bin = bins + ((cast_float_iter<Div_type,
|
||||
RandomAccessIter>(b) >> log_divisor) - div_min);
|
||||
//Three-way swap; if the item to be swapped doesn't belong in the
|
||||
//current bin, swap it to where it belongs
|
||||
if (b_bin != local_bin) {
|
||||
RandomAccessIter c = (*b_bin)++;
|
||||
tmp = *c;
|
||||
*c = *b;
|
||||
}
|
||||
else
|
||||
tmp = *b;
|
||||
*b = *current;
|
||||
*current = tmp;
|
||||
}
|
||||
}
|
||||
*local_bin = nextbinstart;
|
||||
}
|
||||
|
||||
template <class RandomAccessIter, class Div_type>
|
||||
inline void float_swap_loop(RandomAccessIter * bins,
|
||||
RandomAccessIter & nextbinstart, unsigned ii,
|
||||
const size_t *bin_sizes,
|
||||
const unsigned log_divisor, const Div_type div_min)
|
||||
{
|
||||
nextbinstart += bin_sizes[ii];
|
||||
inner_float_swap_loop<RandomAccessIter, Div_type>
|
||||
(bins, nextbinstart, ii, log_divisor, div_min);
|
||||
}
|
||||
|
||||
// Return true if the list is sorted. Otherwise, find the minimum and
|
||||
// maximum. Values are cast to Cast_type before comparison.
|
||||
template <class RandomAccessIter, class Cast_type>
|
||||
inline bool
|
||||
is_sorted_or_find_extremes(RandomAccessIter current, RandomAccessIter last,
|
||||
Cast_type & max, Cast_type & min)
|
||||
{
|
||||
min = max = cast_float_iter<Cast_type, RandomAccessIter>(current);
|
||||
RandomAccessIter prev = current;
|
||||
bool sorted = true;
|
||||
while (++current < last) {
|
||||
Cast_type value = cast_float_iter<Cast_type, RandomAccessIter>(current);
|
||||
sorted &= *current >= *prev;
|
||||
prev = current;
|
||||
if (max < value)
|
||||
max = value;
|
||||
else if (value < min)
|
||||
min = value;
|
||||
}
|
||||
return sorted;
|
||||
}
|
||||
|
||||
//Special-case sorting of positive floats with casting
|
||||
template <class RandomAccessIter, class Div_type, class Size_type>
|
||||
inline void
|
||||
positive_float_sort_rec(RandomAccessIter first, RandomAccessIter last,
|
||||
std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
|
||||
, size_t *bin_sizes)
|
||||
{
|
||||
Div_type max, min;
|
||||
if (is_sorted_or_find_extremes<RandomAccessIter, Div_type>(first, last,
|
||||
max, min))
|
||||
return;
|
||||
unsigned log_divisor = get_log_divisor<float_log_mean_bin_size>(
|
||||
last - first, rough_log_2_size(Size_type(max - min)));
|
||||
Div_type div_min = min >> log_divisor;
|
||||
Div_type div_max = max >> log_divisor;
|
||||
unsigned bin_count = unsigned(div_max - div_min) + 1;
|
||||
unsigned cache_end;
|
||||
RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
|
||||
cache_end, bin_count);
|
||||
|
||||
//Calculating the size of each bin
|
||||
for (RandomAccessIter current = first; current != last;)
|
||||
bin_sizes[unsigned((cast_float_iter<Div_type, RandomAccessIter>(
|
||||
current++) >> log_divisor) - div_min)]++;
|
||||
bins[0] = first;
|
||||
for (unsigned u = 0; u < bin_count - 1; u++)
|
||||
bins[u + 1] = bins[u] + bin_sizes[u];
|
||||
|
||||
|
||||
//Swap into place
|
||||
RandomAccessIter nextbinstart = first;
|
||||
for (unsigned u = 0; u < bin_count - 1; ++u)
|
||||
float_swap_loop<RandomAccessIter, Div_type>
|
||||
(bins, nextbinstart, u, bin_sizes, log_divisor, div_min);
|
||||
bins[bin_count - 1] = last;
|
||||
|
||||
//Return if we've completed bucketsorting
|
||||
if (!log_divisor)
|
||||
return;
|
||||
|
||||
//Recursing
|
||||
size_t max_count = get_min_count<float_log_mean_bin_size,
|
||||
float_log_min_split_count,
|
||||
float_log_finishing_count>(log_divisor);
|
||||
RandomAccessIter lastPos = first;
|
||||
for (unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u],
|
||||
++u) {
|
||||
size_t count = bin_cache[u] - lastPos;
|
||||
if (count < 2)
|
||||
continue;
|
||||
if (count < max_count)
|
||||
std::sort(lastPos, bin_cache[u]);
|
||||
else
|
||||
positive_float_sort_rec<RandomAccessIter, Div_type, Size_type>
|
||||
(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
|
||||
}
|
||||
}
|
||||
|
||||
//Sorting negative floats
|
||||
//Bins are iterated in reverse because max_neg_float = min_neg_int
|
||||
template <class RandomAccessIter, class Div_type, class Size_type>
|
||||
inline void
|
||||
negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last,
|
||||
std::vector<RandomAccessIter> &bin_cache,
|
||||
unsigned cache_offset, size_t *bin_sizes)
|
||||
{
|
||||
Div_type max, min;
|
||||
if (is_sorted_or_find_extremes<RandomAccessIter, Div_type>(first, last,
|
||||
max, min))
|
||||
return;
|
||||
|
||||
unsigned log_divisor = get_log_divisor<float_log_mean_bin_size>(
|
||||
last - first, rough_log_2_size(Size_type(max - min)));
|
||||
Div_type div_min = min >> log_divisor;
|
||||
Div_type div_max = max >> log_divisor;
|
||||
unsigned bin_count = unsigned(div_max - div_min) + 1;
|
||||
unsigned cache_end;
|
||||
RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
|
||||
cache_end, bin_count);
|
||||
|
||||
//Calculating the size of each bin
|
||||
for (RandomAccessIter current = first; current != last;)
|
||||
bin_sizes[unsigned((cast_float_iter<Div_type, RandomAccessIter>(
|
||||
current++) >> log_divisor) - div_min)]++;
|
||||
bins[bin_count - 1] = first;
|
||||
for (int ii = bin_count - 2; ii >= 0; --ii)
|
||||
bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
|
||||
|
||||
//Swap into place
|
||||
RandomAccessIter nextbinstart = first;
|
||||
//The last bin will always have the correct elements in it
|
||||
for (int ii = bin_count - 1; ii > 0; --ii)
|
||||
float_swap_loop<RandomAccessIter, Div_type>
|
||||
(bins, nextbinstart, ii, bin_sizes, log_divisor, div_min);
|
||||
//Update the end position because we don't process the last bin
|
||||
bin_cache[cache_offset] = last;
|
||||
|
||||
//Return if we've completed bucketsorting
|
||||
if (!log_divisor)
|
||||
return;
|
||||
|
||||
//Recursing
|
||||
size_t max_count = get_min_count<float_log_mean_bin_size,
|
||||
float_log_min_split_count,
|
||||
float_log_finishing_count>(log_divisor);
|
||||
RandomAccessIter lastPos = first;
|
||||
for (int ii = cache_end - 1; ii >= static_cast<int>(cache_offset);
|
||||
lastPos = bin_cache[ii], --ii) {
|
||||
size_t count = bin_cache[ii] - lastPos;
|
||||
if (count < 2)
|
||||
continue;
|
||||
if (count < max_count)
|
||||
std::sort(lastPos, bin_cache[ii]);
|
||||
else
|
||||
negative_float_sort_rec<RandomAccessIter, Div_type, Size_type>
|
||||
(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes);
|
||||
}
|
||||
}
|
||||
|
||||
//Sorting negative floats
|
||||
//Bins are iterated in reverse order because max_neg_float = min_neg_int
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift,
|
||||
class Size_type>
|
||||
inline void
|
||||
negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last,
|
||||
std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
|
||||
, size_t *bin_sizes, Right_shift rshift)
|
||||
{
|
||||
Div_type max, min;
|
||||
if (is_sorted_or_find_extremes(first, last, max, min, rshift))
|
||||
return;
|
||||
unsigned log_divisor = get_log_divisor<float_log_mean_bin_size>(
|
||||
last - first, rough_log_2_size(Size_type(max - min)));
|
||||
Div_type div_min = min >> log_divisor;
|
||||
Div_type div_max = max >> log_divisor;
|
||||
unsigned bin_count = unsigned(div_max - div_min) + 1;
|
||||
unsigned cache_end;
|
||||
RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
|
||||
cache_end, bin_count);
|
||||
|
||||
//Calculating the size of each bin
|
||||
for (RandomAccessIter current = first; current != last;)
|
||||
bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
|
||||
bins[bin_count - 1] = first;
|
||||
for (int ii = bin_count - 2; ii >= 0; --ii)
|
||||
bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
|
||||
|
||||
//Swap into place
|
||||
RandomAccessIter nextbinstart = first;
|
||||
//The last bin will always have the correct elements in it
|
||||
for (int ii = bin_count - 1; ii > 0; --ii)
|
||||
swap_loop<RandomAccessIter, Div_type, Right_shift>
|
||||
(bins, nextbinstart, ii, rshift, bin_sizes, log_divisor, div_min);
|
||||
//Update the end position of the unprocessed last bin
|
||||
bin_cache[cache_offset] = last;
|
||||
|
||||
//Return if we've completed bucketsorting
|
||||
if (!log_divisor)
|
||||
return;
|
||||
|
||||
//Recursing
|
||||
size_t max_count = get_min_count<float_log_mean_bin_size,
|
||||
float_log_min_split_count,
|
||||
float_log_finishing_count>(log_divisor);
|
||||
RandomAccessIter lastPos = first;
|
||||
for (int ii = cache_end - 1; ii >= static_cast<int>(cache_offset);
|
||||
lastPos = bin_cache[ii], --ii) {
|
||||
size_t count = bin_cache[ii] - lastPos;
|
||||
if (count < 2)
|
||||
continue;
|
||||
if (count < max_count)
|
||||
std::sort(lastPos, bin_cache[ii]);
|
||||
else
|
||||
negative_float_sort_rec<RandomAccessIter, Div_type, Right_shift,
|
||||
Size_type>
|
||||
(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, rshift);
|
||||
}
|
||||
}
|
||||
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift,
|
||||
class Compare, class Size_type>
|
||||
inline void
|
||||
negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last,
|
||||
std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset,
|
||||
size_t *bin_sizes, Right_shift rshift, Compare comp)
|
||||
{
|
||||
Div_type max, min;
|
||||
if (is_sorted_or_find_extremes(first, last, max, min, rshift, comp))
|
||||
return;
|
||||
unsigned log_divisor = get_log_divisor<float_log_mean_bin_size>(
|
||||
last - first, rough_log_2_size(Size_type(max - min)));
|
||||
Div_type div_min = min >> log_divisor;
|
||||
Div_type div_max = max >> log_divisor;
|
||||
unsigned bin_count = unsigned(div_max - div_min) + 1;
|
||||
unsigned cache_end;
|
||||
RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
|
||||
cache_end, bin_count);
|
||||
|
||||
//Calculating the size of each bin
|
||||
for (RandomAccessIter current = first; current != last;)
|
||||
bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
|
||||
bins[bin_count - 1] = first;
|
||||
for (int ii = bin_count - 2; ii >= 0; --ii)
|
||||
bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
|
||||
|
||||
//Swap into place
|
||||
RandomAccessIter nextbinstart = first;
|
||||
//The last bin will always have the correct elements in it
|
||||
for (int ii = bin_count - 1; ii > 0; --ii)
|
||||
swap_loop<RandomAccessIter, Div_type, Right_shift>
|
||||
(bins, nextbinstart, ii, rshift, bin_sizes, log_divisor, div_min);
|
||||
//Update the end position of the unprocessed last bin
|
||||
bin_cache[cache_offset] = last;
|
||||
|
||||
//Return if we've completed bucketsorting
|
||||
if (!log_divisor)
|
||||
return;
|
||||
|
||||
//Recursing
|
||||
size_t max_count = get_min_count<float_log_mean_bin_size,
|
||||
float_log_min_split_count,
|
||||
float_log_finishing_count>(log_divisor);
|
||||
RandomAccessIter lastPos = first;
|
||||
for (int ii = cache_end - 1; ii >= static_cast<int>(cache_offset);
|
||||
lastPos = bin_cache[ii], --ii) {
|
||||
size_t count = bin_cache[ii] - lastPos;
|
||||
if (count < 2)
|
||||
continue;
|
||||
if (count < max_count)
|
||||
std::sort(lastPos, bin_cache[ii], comp);
|
||||
else
|
||||
negative_float_sort_rec<RandomAccessIter, Div_type, Right_shift,
|
||||
Compare, Size_type>(lastPos, bin_cache[ii],
|
||||
bin_cache, cache_end,
|
||||
bin_sizes, rshift, comp);
|
||||
}
|
||||
}
|
||||
|
||||
//Casting special-case for floating-point sorting
|
||||
template <class RandomAccessIter, class Div_type, class Size_type>
|
||||
inline void
|
||||
float_sort_rec(RandomAccessIter first, RandomAccessIter last,
|
||||
std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
|
||||
, size_t *bin_sizes)
|
||||
{
|
||||
Div_type max, min;
|
||||
if (is_sorted_or_find_extremes<RandomAccessIter, Div_type>(first, last,
|
||||
max, min))
|
||||
return;
|
||||
unsigned log_divisor = get_log_divisor<float_log_mean_bin_size>(
|
||||
last - first, rough_log_2_size(Size_type(max - min)));
|
||||
Div_type div_min = min >> log_divisor;
|
||||
Div_type div_max = max >> log_divisor;
|
||||
unsigned bin_count = unsigned(div_max - div_min) + 1;
|
||||
unsigned cache_end;
|
||||
RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
|
||||
cache_end, bin_count);
|
||||
|
||||
//Calculating the size of each bin
|
||||
for (RandomAccessIter current = first; current != last;)
|
||||
bin_sizes[unsigned((cast_float_iter<Div_type, RandomAccessIter>(
|
||||
current++) >> log_divisor) - div_min)]++;
|
||||
//The index of the first positive bin
|
||||
//Must be divided small enough to fit into an integer
|
||||
unsigned first_positive = (div_min < 0) ? unsigned(-div_min) : 0;
|
||||
//Resetting if all bins are negative
|
||||
if (cache_offset + first_positive > cache_end)
|
||||
first_positive = cache_end - cache_offset;
|
||||
//Reversing the order of the negative bins
|
||||
//Note that because of the negative/positive ordering direction flip
|
||||
//We can not depend upon bin order and positions matching up
|
||||
//so bin_sizes must be reused to contain the end of the bin
|
||||
if (first_positive > 0) {
|
||||
bins[first_positive - 1] = first;
|
||||
for (int ii = first_positive - 2; ii >= 0; --ii) {
|
||||
bins[ii] = first + bin_sizes[ii + 1];
|
||||
bin_sizes[ii] += bin_sizes[ii + 1];
|
||||
}
|
||||
//Handling positives following negatives
|
||||
if (first_positive < bin_count) {
|
||||
bins[first_positive] = first + bin_sizes[0];
|
||||
bin_sizes[first_positive] += bin_sizes[0];
|
||||
}
|
||||
}
|
||||
else
|
||||
bins[0] = first;
|
||||
for (unsigned u = first_positive; u < bin_count - 1; u++) {
|
||||
bins[u + 1] = first + bin_sizes[u];
|
||||
bin_sizes[u + 1] += bin_sizes[u];
|
||||
}
|
||||
|
||||
//Swap into place
|
||||
RandomAccessIter nextbinstart = first;
|
||||
for (unsigned u = 0; u < bin_count; ++u) {
|
||||
nextbinstart = first + bin_sizes[u];
|
||||
inner_float_swap_loop<RandomAccessIter, Div_type>
|
||||
(bins, nextbinstart, u, log_divisor, div_min);
|
||||
}
|
||||
|
||||
if (!log_divisor)
|
||||
return;
|
||||
|
||||
//Handling negative values first
|
||||
size_t max_count = get_min_count<float_log_mean_bin_size,
|
||||
float_log_min_split_count,
|
||||
float_log_finishing_count>(log_divisor);
|
||||
RandomAccessIter lastPos = first;
|
||||
for (int ii = cache_offset + first_positive - 1;
|
||||
ii >= static_cast<int>(cache_offset);
|
||||
lastPos = bin_cache[ii--]) {
|
||||
size_t count = bin_cache[ii] - lastPos;
|
||||
if (count < 2)
|
||||
continue;
|
||||
if (count < max_count)
|
||||
std::sort(lastPos, bin_cache[ii]);
|
||||
//sort negative values using reversed-bin spreadsort
|
||||
else
|
||||
negative_float_sort_rec<RandomAccessIter, Div_type, Size_type>
|
||||
(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes);
|
||||
}
|
||||
|
||||
for (unsigned u = cache_offset + first_positive; u < cache_end;
|
||||
lastPos = bin_cache[u], ++u) {
|
||||
size_t count = bin_cache[u] - lastPos;
|
||||
if (count < 2)
|
||||
continue;
|
||||
if (count < max_count)
|
||||
std::sort(lastPos, bin_cache[u]);
|
||||
//sort positive values using normal spreadsort
|
||||
else
|
||||
positive_float_sort_rec<RandomAccessIter, Div_type, Size_type>
|
||||
(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
|
||||
}
|
||||
}
|
||||
|
||||
//Functor implementation for recursive sorting
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift
|
||||
, class Size_type>
|
||||
inline void
|
||||
float_sort_rec(RandomAccessIter first, RandomAccessIter last,
|
||||
std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
|
||||
, size_t *bin_sizes, Right_shift rshift)
|
||||
{
|
||||
Div_type max, min;
|
||||
if (is_sorted_or_find_extremes(first, last, max, min, rshift))
|
||||
return;
|
||||
unsigned log_divisor = get_log_divisor<float_log_mean_bin_size>(
|
||||
last - first, rough_log_2_size(Size_type(max - min)));
|
||||
Div_type div_min = min >> log_divisor;
|
||||
Div_type div_max = max >> log_divisor;
|
||||
unsigned bin_count = unsigned(div_max - div_min) + 1;
|
||||
unsigned cache_end;
|
||||
RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
|
||||
cache_end, bin_count);
|
||||
|
||||
//Calculating the size of each bin
|
||||
for (RandomAccessIter current = first; current != last;)
|
||||
bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
|
||||
//The index of the first positive bin
|
||||
unsigned first_positive = (div_min < 0) ? unsigned(-div_min) : 0;
|
||||
//Resetting if all bins are negative
|
||||
if (cache_offset + first_positive > cache_end)
|
||||
first_positive = cache_end - cache_offset;
|
||||
//Reversing the order of the negative bins
|
||||
//Note that because of the negative/positive ordering direction flip
|
||||
//We can not depend upon bin order and positions matching up
|
||||
//so bin_sizes must be reused to contain the end of the bin
|
||||
if (first_positive > 0) {
|
||||
bins[first_positive - 1] = first;
|
||||
for (int ii = first_positive - 2; ii >= 0; --ii) {
|
||||
bins[ii] = first + bin_sizes[ii + 1];
|
||||
bin_sizes[ii] += bin_sizes[ii + 1];
|
||||
}
|
||||
//Handling positives following negatives
|
||||
if (static_cast<unsigned>(first_positive) < bin_count) {
|
||||
bins[first_positive] = first + bin_sizes[0];
|
||||
bin_sizes[first_positive] += bin_sizes[0];
|
||||
}
|
||||
}
|
||||
else
|
||||
bins[0] = first;
|
||||
for (unsigned u = first_positive; u < bin_count - 1; u++) {
|
||||
bins[u + 1] = first + bin_sizes[u];
|
||||
bin_sizes[u + 1] += bin_sizes[u];
|
||||
}
|
||||
|
||||
//Swap into place
|
||||
RandomAccessIter next_bin_start = first;
|
||||
for (unsigned u = 0; u < bin_count; ++u) {
|
||||
next_bin_start = first + bin_sizes[u];
|
||||
inner_swap_loop<RandomAccessIter, Div_type, Right_shift>
|
||||
(bins, next_bin_start, u, rshift, log_divisor, div_min);
|
||||
}
|
||||
|
||||
//Return if we've completed bucketsorting
|
||||
if (!log_divisor)
|
||||
return;
|
||||
|
||||
//Handling negative values first
|
||||
size_t max_count = get_min_count<float_log_mean_bin_size,
|
||||
float_log_min_split_count,
|
||||
float_log_finishing_count>(log_divisor);
|
||||
RandomAccessIter lastPos = first;
|
||||
for (int ii = cache_offset + first_positive - 1;
|
||||
ii >= static_cast<int>(cache_offset);
|
||||
lastPos = bin_cache[ii--]) {
|
||||
size_t count = bin_cache[ii] - lastPos;
|
||||
if (count < 2)
|
||||
continue;
|
||||
if (count < max_count)
|
||||
std::sort(lastPos, bin_cache[ii]);
|
||||
//sort negative values using reversed-bin spreadsort
|
||||
else
|
||||
negative_float_sort_rec<RandomAccessIter, Div_type,
|
||||
Right_shift, Size_type>(lastPos, bin_cache[ii], bin_cache,
|
||||
cache_end, bin_sizes, rshift);
|
||||
}
|
||||
|
||||
for (unsigned u = cache_offset + first_positive; u < cache_end;
|
||||
lastPos = bin_cache[u], ++u) {
|
||||
size_t count = bin_cache[u] - lastPos;
|
||||
if (count < 2)
|
||||
continue;
|
||||
if (count < max_count)
|
||||
std::sort(lastPos, bin_cache[u]);
|
||||
//sort positive values using normal spreadsort
|
||||
else
|
||||
spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Size_type,
|
||||
float_log_mean_bin_size, float_log_min_split_count,
|
||||
float_log_finishing_count>
|
||||
(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, rshift);
|
||||
}
|
||||
}
|
||||
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift,
|
||||
class Compare, class Size_type>
|
||||
inline void
|
||||
float_sort_rec(RandomAccessIter first, RandomAccessIter last,
|
||||
std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset,
|
||||
size_t *bin_sizes, Right_shift rshift, Compare comp)
|
||||
{
|
||||
Div_type max, min;
|
||||
if (is_sorted_or_find_extremes(first, last, max, min, rshift, comp))
|
||||
return;
|
||||
unsigned log_divisor = get_log_divisor<float_log_mean_bin_size>(
|
||||
last - first, rough_log_2_size(Size_type(max - min)));
|
||||
Div_type div_min = min >> log_divisor;
|
||||
Div_type div_max = max >> log_divisor;
|
||||
unsigned bin_count = unsigned(div_max - div_min) + 1;
|
||||
unsigned cache_end;
|
||||
RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
|
||||
cache_end, bin_count);
|
||||
|
||||
//Calculating the size of each bin
|
||||
for (RandomAccessIter current = first; current != last;)
|
||||
bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
|
||||
//The index of the first positive bin
|
||||
unsigned first_positive =
|
||||
(div_min < 0) ? static_cast<unsigned>(-div_min) : 0;
|
||||
//Resetting if all bins are negative
|
||||
if (cache_offset + first_positive > cache_end)
|
||||
first_positive = cache_end - cache_offset;
|
||||
//Reversing the order of the negative bins
|
||||
//Note that because of the negative/positive ordering direction flip
|
||||
//We can not depend upon bin order and positions matching up
|
||||
//so bin_sizes must be reused to contain the end of the bin
|
||||
if (first_positive > 0) {
|
||||
bins[first_positive - 1] = first;
|
||||
for (int ii = first_positive - 2; ii >= 0; --ii) {
|
||||
bins[ii] = first + bin_sizes[ii + 1];
|
||||
bin_sizes[ii] += bin_sizes[ii + 1];
|
||||
}
|
||||
//Handling positives following negatives
|
||||
if (static_cast<unsigned>(first_positive) < bin_count) {
|
||||
bins[first_positive] = first + bin_sizes[0];
|
||||
bin_sizes[first_positive] += bin_sizes[0];
|
||||
}
|
||||
}
|
||||
else
|
||||
bins[0] = first;
|
||||
for (unsigned u = first_positive; u < bin_count - 1; u++) {
|
||||
bins[u + 1] = first + bin_sizes[u];
|
||||
bin_sizes[u + 1] += bin_sizes[u];
|
||||
}
|
||||
|
||||
//Swap into place
|
||||
RandomAccessIter next_bin_start = first;
|
||||
for (unsigned u = 0; u < bin_count; ++u) {
|
||||
next_bin_start = first + bin_sizes[u];
|
||||
inner_swap_loop<RandomAccessIter, Div_type, Right_shift>
|
||||
(bins, next_bin_start, u, rshift, log_divisor, div_min);
|
||||
}
|
||||
|
||||
//Return if we've completed bucketsorting
|
||||
if (!log_divisor)
|
||||
return;
|
||||
|
||||
//Handling negative values first
|
||||
size_t max_count = get_min_count<float_log_mean_bin_size,
|
||||
float_log_min_split_count,
|
||||
float_log_finishing_count>(log_divisor);
|
||||
RandomAccessIter lastPos = first;
|
||||
for (int ii = cache_offset + first_positive - 1;
|
||||
ii >= static_cast<int>(cache_offset);
|
||||
lastPos = bin_cache[ii--]) {
|
||||
size_t count = bin_cache[ii] - lastPos;
|
||||
if (count < 2)
|
||||
continue;
|
||||
if (count < max_count)
|
||||
std::sort(lastPos, bin_cache[ii], comp);
|
||||
//sort negative values using reversed-bin spreadsort
|
||||
else
|
||||
negative_float_sort_rec<RandomAccessIter, Div_type, Right_shift,
|
||||
Compare, Size_type>(lastPos, bin_cache[ii],
|
||||
bin_cache, cache_end,
|
||||
bin_sizes, rshift, comp);
|
||||
}
|
||||
|
||||
for (unsigned u = cache_offset + first_positive; u < cache_end;
|
||||
lastPos = bin_cache[u], ++u) {
|
||||
size_t count = bin_cache[u] - lastPos;
|
||||
if (count < 2)
|
||||
continue;
|
||||
if (count < max_count)
|
||||
std::sort(lastPos, bin_cache[u], comp);
|
||||
//sort positive values using normal spreadsort
|
||||
else
|
||||
spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
|
||||
Size_type, float_log_mean_bin_size,
|
||||
float_log_min_split_count, float_log_finishing_count>
|
||||
(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, rshift, comp);
|
||||
}
|
||||
}
|
||||
|
||||
//Checking whether the value type is a float, and trying a 32-bit integer
|
||||
template <class RandomAccessIter>
|
||||
inline typename boost::enable_if_c< sizeof(boost::uint32_t) ==
|
||||
sizeof(typename std::iterator_traits<RandomAccessIter>::value_type)
|
||||
&& std::numeric_limits<typename
|
||||
std::iterator_traits<RandomAccessIter>::value_type>::is_iec559,
|
||||
void >::type
|
||||
float_sort(RandomAccessIter first, RandomAccessIter last)
|
||||
{
|
||||
size_t bin_sizes[1 << max_finishing_splits];
|
||||
std::vector<RandomAccessIter> bin_cache;
|
||||
float_sort_rec<RandomAccessIter, boost::int32_t, boost::uint32_t>
|
||||
(first, last, bin_cache, 0, bin_sizes);
|
||||
}
|
||||
|
||||
//Checking whether the value type is a double, and using a 64-bit integer
|
||||
template <class RandomAccessIter>
|
||||
inline typename boost::enable_if_c< sizeof(boost::uint64_t) ==
|
||||
sizeof(typename std::iterator_traits<RandomAccessIter>::value_type)
|
||||
&& std::numeric_limits<typename
|
||||
std::iterator_traits<RandomAccessIter>::value_type>::is_iec559,
|
||||
void >::type
|
||||
float_sort(RandomAccessIter first, RandomAccessIter last)
|
||||
{
|
||||
size_t bin_sizes[1 << max_finishing_splits];
|
||||
std::vector<RandomAccessIter> bin_cache;
|
||||
float_sort_rec<RandomAccessIter, boost::int64_t, boost::uint64_t>
|
||||
(first, last, bin_cache, 0, bin_sizes);
|
||||
}
|
||||
|
||||
template <class RandomAccessIter>
|
||||
inline typename boost::disable_if_c< (sizeof(boost::uint64_t) ==
|
||||
sizeof(typename std::iterator_traits<RandomAccessIter>::value_type)
|
||||
|| sizeof(boost::uint32_t) ==
|
||||
sizeof(typename std::iterator_traits<RandomAccessIter>::value_type))
|
||||
&& std::numeric_limits<typename
|
||||
std::iterator_traits<RandomAccessIter>::value_type>::is_iec559,
|
||||
void >::type
|
||||
float_sort(RandomAccessIter first, RandomAccessIter last)
|
||||
{
|
||||
BOOST_STATIC_WARNING(!(sizeof(boost::uint64_t) ==
|
||||
sizeof(typename std::iterator_traits<RandomAccessIter>::value_type)
|
||||
|| sizeof(boost::uint32_t) ==
|
||||
sizeof(typename std::iterator_traits<RandomAccessIter>::value_type))
|
||||
|| !std::numeric_limits<typename
|
||||
std::iterator_traits<RandomAccessIter>::value_type>::is_iec559);
|
||||
std::sort(first, last);
|
||||
}
|
||||
|
||||
//These approaches require the user to do the typecast
|
||||
//with rshift but default comparision
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift>
|
||||
inline typename boost::enable_if_c< sizeof(size_t) >= sizeof(Div_type),
|
||||
void >::type
|
||||
float_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
|
||||
Right_shift rshift)
|
||||
{
|
||||
size_t bin_sizes[1 << max_finishing_splits];
|
||||
std::vector<RandomAccessIter> bin_cache;
|
||||
float_sort_rec<RandomAccessIter, Div_type, Right_shift, size_t>
|
||||
(first, last, bin_cache, 0, bin_sizes, rshift);
|
||||
}
|
||||
|
||||
//maximum integer size with rshift but default comparision
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift>
|
||||
inline typename boost::enable_if_c< sizeof(size_t) < sizeof(Div_type)
|
||||
&& sizeof(boost::uintmax_t) >= sizeof(Div_type), void >::type
|
||||
float_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
|
||||
Right_shift rshift)
|
||||
{
|
||||
size_t bin_sizes[1 << max_finishing_splits];
|
||||
std::vector<RandomAccessIter> bin_cache;
|
||||
float_sort_rec<RandomAccessIter, Div_type, Right_shift, boost::uintmax_t>
|
||||
(first, last, bin_cache, 0, bin_sizes, rshift);
|
||||
}
|
||||
|
||||
//sizeof(Div_type) doesn't match, so use std::sort
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift>
|
||||
inline typename boost::disable_if_c< sizeof(boost::uintmax_t) >=
|
||||
sizeof(Div_type), void >::type
|
||||
float_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
|
||||
Right_shift rshift)
|
||||
{
|
||||
BOOST_STATIC_WARNING(sizeof(boost::uintmax_t) >= sizeof(Div_type));
|
||||
std::sort(first, last);
|
||||
}
|
||||
|
||||
//specialized comparison
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift,
|
||||
class Compare>
|
||||
inline typename boost::enable_if_c< sizeof(size_t) >= sizeof(Div_type),
|
||||
void >::type
|
||||
float_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
|
||||
Right_shift rshift, Compare comp)
|
||||
{
|
||||
size_t bin_sizes[1 << max_finishing_splits];
|
||||
std::vector<RandomAccessIter> bin_cache;
|
||||
float_sort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
|
||||
size_t>
|
||||
(first, last, bin_cache, 0, bin_sizes, rshift, comp);
|
||||
}
|
||||
|
||||
//max-sized integer with specialized comparison
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift,
|
||||
class Compare>
|
||||
inline typename boost::enable_if_c< sizeof(size_t) < sizeof(Div_type)
|
||||
&& sizeof(boost::uintmax_t) >= sizeof(Div_type), void >::type
|
||||
float_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
|
||||
Right_shift rshift, Compare comp)
|
||||
{
|
||||
size_t bin_sizes[1 << max_finishing_splits];
|
||||
std::vector<RandomAccessIter> bin_cache;
|
||||
float_sort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
|
||||
boost::uintmax_t>
|
||||
(first, last, bin_cache, 0, bin_sizes, rshift, comp);
|
||||
}
|
||||
|
||||
//sizeof(Div_type) doesn't match, so use std::sort
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift,
|
||||
class Compare>
|
||||
inline typename boost::disable_if_c< sizeof(boost::uintmax_t) >=
|
||||
sizeof(Div_type), void >::type
|
||||
float_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
|
||||
Right_shift rshift, Compare comp)
|
||||
{
|
||||
BOOST_STATIC_WARNING(sizeof(boost::uintmax_t) >= sizeof(Div_type));
|
||||
std::sort(first, last, comp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,494 @@
|
||||
// Details for templated Spreadsort-based integer_sort.
|
||||
|
||||
// Copyright Steven J. Ross 2001 - 2014.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// See http://www.boost.org/libs/sort for library home page.
|
||||
|
||||
/*
|
||||
Some improvements suggested by:
|
||||
Phil Endecott and Frank Gennari
|
||||
*/
|
||||
|
||||
#ifndef BOOST_SORT_SPREADSORT_DETAIL_INTEGER_SORT_HPP
|
||||
#define BOOST_SORT_SPREADSORT_DETAIL_INTEGER_SORT_HPP
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#include <functional>
|
||||
#include <boost/static_assert.hpp>
|
||||
#include <boost/serialization/static_warning.hpp>
|
||||
#include <boost/utility/enable_if.hpp>
|
||||
#include <boost/sort/spreadsort/detail/constants.hpp>
|
||||
#include <boost/sort/spreadsort/detail/spreadsort_common.hpp>
|
||||
#include <boost/cstdint.hpp>
|
||||
|
||||
namespace boost {
|
||||
namespace sort {
|
||||
namespace spreadsort {
|
||||
namespace detail {
|
||||
// Return true if the list is sorted. Otherwise, find the minimum and
|
||||
// maximum using <.
|
||||
template <class RandomAccessIter>
|
||||
inline bool
|
||||
is_sorted_or_find_extremes(RandomAccessIter current, RandomAccessIter last,
|
||||
RandomAccessIter & max, RandomAccessIter & min)
|
||||
{
|
||||
min = max = current;
|
||||
//This assumes we have more than 1 element based on prior checks.
|
||||
while (!(*(current + 1) < *current)) {
|
||||
//If everything is in sorted order, return
|
||||
if (++current == last - 1)
|
||||
return true;
|
||||
}
|
||||
|
||||
//The maximum is the last sorted element
|
||||
max = current;
|
||||
//Start from the first unsorted element
|
||||
while (++current < last) {
|
||||
if (*max < *current)
|
||||
max = current;
|
||||
else if (*current < *min)
|
||||
min = current;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Return true if the list is sorted. Otherwise, find the minimum and
|
||||
// maximum.
|
||||
// Use a user-defined comparison operator
|
||||
template <class RandomAccessIter, class Compare>
|
||||
inline bool
|
||||
is_sorted_or_find_extremes(RandomAccessIter current, RandomAccessIter last,
|
||||
RandomAccessIter & max, RandomAccessIter & min, Compare comp)
|
||||
{
|
||||
min = max = current;
|
||||
while (!comp(*(current + 1), *current)) {
|
||||
//If everything is in sorted order, return
|
||||
if (++current == last - 1)
|
||||
return true;
|
||||
}
|
||||
|
||||
//The maximum is the last sorted element
|
||||
max = current;
|
||||
while (++current < last) {
|
||||
if (comp(*max, *current))
|
||||
max = current;
|
||||
else if (comp(*current, *min))
|
||||
min = current;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
//Gets a non-negative right bit shift to operate as a logarithmic divisor
|
||||
template<unsigned log_mean_bin_size>
|
||||
inline int
|
||||
get_log_divisor(size_t count, int log_range)
|
||||
{
|
||||
int log_divisor;
|
||||
//If we can finish in one iteration without exceeding either
|
||||
//(2 to the max_finishing_splits) or n bins, do so
|
||||
if ((log_divisor = log_range - rough_log_2_size(count)) <= 0 &&
|
||||
log_range <= max_finishing_splits)
|
||||
log_divisor = 0;
|
||||
else {
|
||||
//otherwise divide the data into an optimized number of pieces
|
||||
log_divisor += log_mean_bin_size;
|
||||
//Cannot exceed max_splits or cache misses slow down bin lookups
|
||||
if ((log_range - log_divisor) > max_splits)
|
||||
log_divisor = log_range - max_splits;
|
||||
}
|
||||
return log_divisor;
|
||||
}
|
||||
|
||||
//Implementation for recursive integer sorting
|
||||
template <class RandomAccessIter, class Div_type, class Size_type>
|
||||
inline void
|
||||
spreadsort_rec(RandomAccessIter first, RandomAccessIter last,
|
||||
std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
|
||||
, size_t *bin_sizes)
|
||||
{
|
||||
//This step is roughly 10% of runtime, but it helps avoid worst-case
|
||||
//behavior and improve behavior with real data
|
||||
//If you know the maximum and minimum ahead of time, you can pass those
|
||||
//values in and skip this step for the first iteration
|
||||
RandomAccessIter max, min;
|
||||
if (is_sorted_or_find_extremes(first, last, max, min))
|
||||
return;
|
||||
RandomAccessIter * target_bin;
|
||||
unsigned log_divisor = get_log_divisor<int_log_mean_bin_size>(
|
||||
last - first, rough_log_2_size(Size_type((*max >> 0) - (*min >> 0))));
|
||||
Div_type div_min = *min >> log_divisor;
|
||||
Div_type div_max = *max >> log_divisor;
|
||||
unsigned bin_count = unsigned(div_max - div_min) + 1;
|
||||
unsigned cache_end;
|
||||
RandomAccessIter * bins =
|
||||
size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
|
||||
|
||||
//Calculating the size of each bin; this takes roughly 10% of runtime
|
||||
for (RandomAccessIter current = first; current != last;)
|
||||
bin_sizes[size_t((*(current++) >> log_divisor) - div_min)]++;
|
||||
//Assign the bin positions
|
||||
bins[0] = first;
|
||||
for (unsigned u = 0; u < bin_count - 1; u++)
|
||||
bins[u + 1] = bins[u] + bin_sizes[u];
|
||||
|
||||
RandomAccessIter nextbinstart = first;
|
||||
//Swap into place
|
||||
//This dominates runtime, mostly in the swap and bin lookups
|
||||
for (unsigned u = 0; u < bin_count - 1; ++u) {
|
||||
RandomAccessIter * local_bin = bins + u;
|
||||
nextbinstart += bin_sizes[u];
|
||||
//Iterating over each element in this bin
|
||||
for (RandomAccessIter current = *local_bin; current < nextbinstart;
|
||||
++current) {
|
||||
//Swapping elements in current into place until the correct
|
||||
//element has been swapped in
|
||||
for (target_bin = (bins + ((*current >> log_divisor) - div_min));
|
||||
target_bin != local_bin;
|
||||
target_bin = bins + ((*current >> log_divisor) - div_min)) {
|
||||
//3-way swap; this is about 1% faster than a 2-way swap
|
||||
//The main advantage is less copies are involved per item
|
||||
//put in the correct place
|
||||
typename std::iterator_traits<RandomAccessIter>::value_type tmp;
|
||||
RandomAccessIter b = (*target_bin)++;
|
||||
RandomAccessIter * b_bin = bins + ((*b >> log_divisor) - div_min);
|
||||
if (b_bin != local_bin) {
|
||||
RandomAccessIter c = (*b_bin)++;
|
||||
tmp = *c;
|
||||
*c = *b;
|
||||
}
|
||||
else
|
||||
tmp = *b;
|
||||
*b = *current;
|
||||
*current = tmp;
|
||||
}
|
||||
}
|
||||
*local_bin = nextbinstart;
|
||||
}
|
||||
bins[bin_count - 1] = last;
|
||||
|
||||
//If we've bucketsorted, the array is sorted and we should skip recursion
|
||||
if (!log_divisor)
|
||||
return;
|
||||
//log_divisor is the remaining range; calculating the comparison threshold
|
||||
size_t max_count =
|
||||
get_min_count<int_log_mean_bin_size, int_log_min_split_count,
|
||||
int_log_finishing_count>(log_divisor);
|
||||
|
||||
//Recursing
|
||||
RandomAccessIter lastPos = first;
|
||||
for (unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u],
|
||||
++u) {
|
||||
Size_type count = bin_cache[u] - lastPos;
|
||||
//don't sort unless there are at least two items to Compare
|
||||
if (count < 2)
|
||||
continue;
|
||||
//using std::sort if its worst-case is better
|
||||
if (count < max_count)
|
||||
std::sort(lastPos, bin_cache[u]);
|
||||
else
|
||||
spreadsort_rec<RandomAccessIter, Div_type, Size_type>(lastPos,
|
||||
bin_cache[u],
|
||||
bin_cache,
|
||||
cache_end,
|
||||
bin_sizes);
|
||||
}
|
||||
}
|
||||
|
||||
//Generic bitshift-based 3-way swapping code
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift>
|
||||
inline void inner_swap_loop(RandomAccessIter * bins,
|
||||
const RandomAccessIter & next_bin_start, unsigned ii, Right_shift &rshift
|
||||
, const unsigned log_divisor, const Div_type div_min)
|
||||
{
|
||||
RandomAccessIter * local_bin = bins + ii;
|
||||
for (RandomAccessIter current = *local_bin; current < next_bin_start;
|
||||
++current) {
|
||||
for (RandomAccessIter * target_bin =
|
||||
(bins + (rshift(*current, log_divisor) - div_min));
|
||||
target_bin != local_bin;
|
||||
target_bin = bins + (rshift(*current, log_divisor) - div_min)) {
|
||||
typename std::iterator_traits<RandomAccessIter>::value_type tmp;
|
||||
RandomAccessIter b = (*target_bin)++;
|
||||
RandomAccessIter * b_bin =
|
||||
bins + (rshift(*b, log_divisor) - div_min);
|
||||
//Three-way swap; if the item to be swapped doesn't belong
|
||||
//in the current bin, swap it to where it belongs
|
||||
if (b_bin != local_bin) {
|
||||
RandomAccessIter c = (*b_bin)++;
|
||||
tmp = *c;
|
||||
*c = *b;
|
||||
}
|
||||
//Note: we could increment current once the swap is done in this case
|
||||
//but that seems to impair performance
|
||||
else
|
||||
tmp = *b;
|
||||
*b = *current;
|
||||
*current = tmp;
|
||||
}
|
||||
}
|
||||
*local_bin = next_bin_start;
|
||||
}
|
||||
|
||||
//Standard swapping wrapper for ascending values
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift>
|
||||
inline void swap_loop(RandomAccessIter * bins,
|
||||
RandomAccessIter & next_bin_start, unsigned ii, Right_shift &rshift
|
||||
, const size_t *bin_sizes
|
||||
, const unsigned log_divisor, const Div_type div_min)
|
||||
{
|
||||
next_bin_start += bin_sizes[ii];
|
||||
inner_swap_loop<RandomAccessIter, Div_type, Right_shift>(bins,
|
||||
next_bin_start, ii, rshift, log_divisor, div_min);
|
||||
}
|
||||
|
||||
//Functor implementation for recursive sorting
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift,
|
||||
class Compare, class Size_type, unsigned log_mean_bin_size,
|
||||
unsigned log_min_split_count, unsigned log_finishing_count>
|
||||
inline void
|
||||
spreadsort_rec(RandomAccessIter first, RandomAccessIter last,
|
||||
std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
|
||||
, size_t *bin_sizes, Right_shift rshift, Compare comp)
|
||||
{
|
||||
RandomAccessIter max, min;
|
||||
if (is_sorted_or_find_extremes(first, last, max, min, comp))
|
||||
return;
|
||||
unsigned log_divisor = get_log_divisor<log_mean_bin_size>(last - first,
|
||||
rough_log_2_size(Size_type(rshift(*max, 0) - rshift(*min, 0))));
|
||||
Div_type div_min = rshift(*min, log_divisor);
|
||||
Div_type div_max = rshift(*max, log_divisor);
|
||||
unsigned bin_count = unsigned(div_max - div_min) + 1;
|
||||
unsigned cache_end;
|
||||
RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
|
||||
cache_end, bin_count);
|
||||
|
||||
//Calculating the size of each bin
|
||||
for (RandomAccessIter current = first; current != last;)
|
||||
bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
|
||||
bins[0] = first;
|
||||
for (unsigned u = 0; u < bin_count - 1; u++)
|
||||
bins[u + 1] = bins[u] + bin_sizes[u];
|
||||
|
||||
//Swap into place
|
||||
RandomAccessIter next_bin_start = first;
|
||||
for (unsigned u = 0; u < bin_count - 1; ++u)
|
||||
swap_loop<RandomAccessIter, Div_type, Right_shift>(bins, next_bin_start,
|
||||
u, rshift, bin_sizes, log_divisor, div_min);
|
||||
bins[bin_count - 1] = last;
|
||||
|
||||
//If we've bucketsorted, the array is sorted
|
||||
if (!log_divisor)
|
||||
return;
|
||||
|
||||
//Recursing
|
||||
size_t max_count = get_min_count<log_mean_bin_size, log_min_split_count,
|
||||
log_finishing_count>(log_divisor);
|
||||
RandomAccessIter lastPos = first;
|
||||
for (unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u],
|
||||
++u) {
|
||||
size_t count = bin_cache[u] - lastPos;
|
||||
if (count < 2)
|
||||
continue;
|
||||
if (count < max_count)
|
||||
std::sort(lastPos, bin_cache[u], comp);
|
||||
else
|
||||
spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
|
||||
Size_type, log_mean_bin_size, log_min_split_count, log_finishing_count>
|
||||
(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, rshift, comp);
|
||||
}
|
||||
}
|
||||
|
||||
//Functor implementation for recursive sorting with only Shift overridden
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift,
|
||||
class Size_type, unsigned log_mean_bin_size,
|
||||
unsigned log_min_split_count, unsigned log_finishing_count>
|
||||
inline void
|
||||
spreadsort_rec(RandomAccessIter first, RandomAccessIter last,
|
||||
std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
|
||||
, size_t *bin_sizes, Right_shift rshift)
|
||||
{
|
||||
RandomAccessIter max, min;
|
||||
if (is_sorted_or_find_extremes(first, last, max, min))
|
||||
return;
|
||||
unsigned log_divisor = get_log_divisor<log_mean_bin_size>(last - first,
|
||||
rough_log_2_size(Size_type(rshift(*max, 0) - rshift(*min, 0))));
|
||||
Div_type div_min = rshift(*min, log_divisor);
|
||||
Div_type div_max = rshift(*max, log_divisor);
|
||||
unsigned bin_count = unsigned(div_max - div_min) + 1;
|
||||
unsigned cache_end;
|
||||
RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
|
||||
cache_end, bin_count);
|
||||
|
||||
//Calculating the size of each bin
|
||||
for (RandomAccessIter current = first; current != last;)
|
||||
bin_sizes[unsigned(rshift(*(current++), log_divisor) - div_min)]++;
|
||||
bins[0] = first;
|
||||
for (unsigned u = 0; u < bin_count - 1; u++)
|
||||
bins[u + 1] = bins[u] + bin_sizes[u];
|
||||
|
||||
//Swap into place
|
||||
RandomAccessIter nextbinstart = first;
|
||||
for (unsigned ii = 0; ii < bin_count - 1; ++ii)
|
||||
swap_loop<RandomAccessIter, Div_type, Right_shift>(bins, nextbinstart,
|
||||
ii, rshift, bin_sizes, log_divisor, div_min);
|
||||
bins[bin_count - 1] = last;
|
||||
|
||||
//If we've bucketsorted, the array is sorted
|
||||
if (!log_divisor)
|
||||
return;
|
||||
|
||||
//Recursing
|
||||
size_t max_count = get_min_count<log_mean_bin_size, log_min_split_count,
|
||||
log_finishing_count>(log_divisor);
|
||||
RandomAccessIter lastPos = first;
|
||||
for (unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u],
|
||||
++u) {
|
||||
size_t count = bin_cache[u] - lastPos;
|
||||
if (count < 2)
|
||||
continue;
|
||||
if (count < max_count)
|
||||
std::sort(lastPos, bin_cache[u]);
|
||||
else
|
||||
spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Size_type,
|
||||
log_mean_bin_size, log_min_split_count, log_finishing_count>(lastPos,
|
||||
bin_cache[u], bin_cache, cache_end, bin_sizes, rshift);
|
||||
}
|
||||
}
|
||||
|
||||
//Holds the bin vector and makes the initial recursive call
|
||||
template <class RandomAccessIter, class Div_type>
|
||||
//Only use spreadsort if the integer can fit in a size_t
|
||||
inline typename boost::enable_if_c< sizeof(Div_type) <= sizeof(size_t),
|
||||
void >::type
|
||||
integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type)
|
||||
{
|
||||
size_t bin_sizes[1 << max_finishing_splits];
|
||||
std::vector<RandomAccessIter> bin_cache;
|
||||
spreadsort_rec<RandomAccessIter, Div_type, size_t>(first, last,
|
||||
bin_cache, 0, bin_sizes);
|
||||
}
|
||||
|
||||
//Holds the bin vector and makes the initial recursive call
|
||||
template <class RandomAccessIter, class Div_type>
|
||||
//Only use spreadsort if the integer can fit in a uintmax_t
|
||||
inline typename boost::enable_if_c< (sizeof(Div_type) > sizeof(size_t))
|
||||
&& sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
|
||||
integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type)
|
||||
{
|
||||
size_t bin_sizes[1 << max_finishing_splits];
|
||||
std::vector<RandomAccessIter> bin_cache;
|
||||
spreadsort_rec<RandomAccessIter, Div_type, boost::uintmax_t>(first,
|
||||
last, bin_cache, 0, bin_sizes);
|
||||
}
|
||||
|
||||
template <class RandomAccessIter, class Div_type>
|
||||
inline typename boost::disable_if_c< sizeof(Div_type) <= sizeof(size_t)
|
||||
|| sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
|
||||
//defaulting to std::sort when integer_sort won't work
|
||||
integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type)
|
||||
{
|
||||
//Warning that we're using std::sort, even though integer_sort was called
|
||||
BOOST_STATIC_WARNING( sizeof(Div_type) <= sizeof(size_t) );
|
||||
std::sort(first, last);
|
||||
}
|
||||
|
||||
|
||||
//Same for the full functor version
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift,
|
||||
class Compare>
|
||||
//Only use spreadsort if the integer can fit in a size_t
|
||||
inline typename boost::enable_if_c< sizeof(Div_type) <= sizeof(size_t),
|
||||
void >::type
|
||||
integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
|
||||
Right_shift shift, Compare comp)
|
||||
{
|
||||
size_t bin_sizes[1 << max_finishing_splits];
|
||||
std::vector<RandomAccessIter> bin_cache;
|
||||
spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
|
||||
size_t, int_log_mean_bin_size, int_log_min_split_count,
|
||||
int_log_finishing_count>
|
||||
(first, last, bin_cache, 0, bin_sizes, shift, comp);
|
||||
}
|
||||
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift,
|
||||
class Compare>
|
||||
//Only use spreadsort if the integer can fit in a uintmax_t
|
||||
inline typename boost::enable_if_c< (sizeof(Div_type) > sizeof(size_t))
|
||||
&& sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
|
||||
integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
|
||||
Right_shift shift, Compare comp)
|
||||
{
|
||||
size_t bin_sizes[1 << max_finishing_splits];
|
||||
std::vector<RandomAccessIter> bin_cache;
|
||||
spreadsort_rec<RandomAccessIter, Div_type, Right_shift, Compare,
|
||||
boost::uintmax_t, int_log_mean_bin_size,
|
||||
int_log_min_split_count, int_log_finishing_count>
|
||||
(first, last, bin_cache, 0, bin_sizes, shift, comp);
|
||||
}
|
||||
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift,
|
||||
class Compare>
|
||||
inline typename boost::disable_if_c< sizeof(Div_type) <= sizeof(size_t)
|
||||
|| sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
|
||||
//defaulting to std::sort when integer_sort won't work
|
||||
integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
|
||||
Right_shift shift, Compare comp)
|
||||
{
|
||||
//Warning that we're using std::sort, even though integer_sort was called
|
||||
BOOST_STATIC_WARNING( sizeof(Div_type) <= sizeof(size_t) );
|
||||
std::sort(first, last, comp);
|
||||
}
|
||||
|
||||
|
||||
//Same for the right shift version
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift>
|
||||
//Only use spreadsort if the integer can fit in a size_t
|
||||
inline typename boost::enable_if_c< sizeof(Div_type) <= sizeof(size_t),
|
||||
void >::type
|
||||
integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
|
||||
Right_shift shift)
|
||||
{
|
||||
size_t bin_sizes[1 << max_finishing_splits];
|
||||
std::vector<RandomAccessIter> bin_cache;
|
||||
spreadsort_rec<RandomAccessIter, Div_type, Right_shift, size_t,
|
||||
int_log_mean_bin_size, int_log_min_split_count,
|
||||
int_log_finishing_count>
|
||||
(first, last, bin_cache, 0, bin_sizes, shift);
|
||||
}
|
||||
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift>
|
||||
//Only use spreadsort if the integer can fit in a uintmax_t
|
||||
inline typename boost::enable_if_c< (sizeof(Div_type) > sizeof(size_t))
|
||||
&& sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
|
||||
integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
|
||||
Right_shift shift)
|
||||
{
|
||||
size_t bin_sizes[1 << max_finishing_splits];
|
||||
std::vector<RandomAccessIter> bin_cache;
|
||||
spreadsort_rec<RandomAccessIter, Div_type, Right_shift,
|
||||
boost::uintmax_t, int_log_mean_bin_size,
|
||||
int_log_min_split_count, int_log_finishing_count>
|
||||
(first, last, bin_cache, 0, bin_sizes, shift);
|
||||
}
|
||||
|
||||
template <class RandomAccessIter, class Div_type, class Right_shift>
|
||||
inline typename boost::disable_if_c< sizeof(Div_type) <= sizeof(size_t)
|
||||
|| sizeof(Div_type) <= sizeof(boost::uintmax_t), void >::type
|
||||
//defaulting to std::sort when integer_sort won't work
|
||||
integer_sort(RandomAccessIter first, RandomAccessIter last, Div_type,
|
||||
Right_shift shift)
|
||||
{
|
||||
//Warning that we're using std::sort, even though integer_sort was called
|
||||
BOOST_STATIC_WARNING( sizeof(Div_type) <= sizeof(size_t) );
|
||||
std::sort(first, last);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
+124
@@ -0,0 +1,124 @@
|
||||
// Contains get_min_count, the core optimization of the spreadsort algorithm.
|
||||
// Also has other helper functions commonly useful across variants.
|
||||
|
||||
// Copyright Steven J. Ross 2001 - 2014.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// See http://www.boost.org/libs/sort for library home page.
|
||||
|
||||
/*
|
||||
Some improvements suggested by:
|
||||
Phil Endecott and Frank Gennari
|
||||
*/
|
||||
|
||||
#ifndef BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_COMMON_HPP
|
||||
#define BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_COMMON_HPP
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <functional>
|
||||
#include <boost/static_assert.hpp>
|
||||
#include <boost/serialization/static_warning.hpp>
|
||||
#include <boost/sort/spreadsort/detail/constants.hpp>
|
||||
#include <boost/cstdint.hpp>
|
||||
|
||||
namespace boost {
|
||||
namespace sort {
|
||||
namespace spreadsort {
|
||||
namespace detail {
|
||||
//This only works on unsigned data types
|
||||
template <typename T>
|
||||
inline unsigned
|
||||
rough_log_2_size(const T& input)
|
||||
{
|
||||
unsigned result = 0;
|
||||
//The && is necessary on some compilers to avoid infinite loops
|
||||
//it doesn't significantly impair performance
|
||||
while ((input >> result) && (result < (8*sizeof(T)))) ++result;
|
||||
return result;
|
||||
}
|
||||
|
||||
//Gets the minimum size to call spreadsort on to control worst-case runtime.
|
||||
//This is called for a set of bins, instead of bin-by-bin, to minimize
|
||||
//runtime overhead.
|
||||
//This could be replaced by a lookup table of sizeof(Div_type)*8 but this
|
||||
//function is more general.
|
||||
template<unsigned log_mean_bin_size,
|
||||
unsigned log_min_split_count, unsigned log_finishing_count>
|
||||
inline size_t
|
||||
get_min_count(unsigned log_range)
|
||||
{
|
||||
const size_t typed_one = 1;
|
||||
const unsigned min_size = log_mean_bin_size + log_min_split_count;
|
||||
//Assuring that constants have valid settings
|
||||
BOOST_STATIC_ASSERT(log_min_split_count <= max_splits &&
|
||||
log_min_split_count > 0);
|
||||
BOOST_STATIC_ASSERT(max_splits > 1 &&
|
||||
max_splits < (8 * sizeof(unsigned)));
|
||||
BOOST_STATIC_ASSERT(max_finishing_splits >= max_splits &&
|
||||
max_finishing_splits < (8 * sizeof(unsigned)));
|
||||
BOOST_STATIC_ASSERT(log_mean_bin_size >= 0);
|
||||
BOOST_STATIC_ASSERT(log_finishing_count >= 0);
|
||||
//if we can complete in one iteration, do so
|
||||
//This first check allows the compiler to optimize never-executed code out
|
||||
if (log_finishing_count < min_size) {
|
||||
if (log_range <= min_size && log_range <= max_splits) {
|
||||
//Return no smaller than a certain minimum limit
|
||||
if (log_range <= log_finishing_count)
|
||||
return typed_one << log_finishing_count;
|
||||
return typed_one << log_range;
|
||||
}
|
||||
}
|
||||
const unsigned base_iterations = max_splits - log_min_split_count;
|
||||
//sum of n to n + x = ((x + 1) * (n + (n + x)))/2 + log_mean_bin_size
|
||||
const unsigned base_range =
|
||||
((base_iterations + 1) * (max_splits + log_min_split_count))/2
|
||||
+ log_mean_bin_size;
|
||||
//Calculating the required number of iterations, and returning
|
||||
//1 << (iteration_count + min_size)
|
||||
if (log_range < base_range) {
|
||||
unsigned result = log_min_split_count;
|
||||
for (unsigned offset = min_size; offset < log_range;
|
||||
offset += ++result);
|
||||
//Preventing overflow; this situation shouldn't occur
|
||||
if ((result + log_mean_bin_size) >= (8 * sizeof(size_t)))
|
||||
return typed_one << ((8 * sizeof(size_t)) - 1);
|
||||
return typed_one << (result + log_mean_bin_size);
|
||||
}
|
||||
//A quick division can calculate the worst-case runtime for larger ranges
|
||||
unsigned remainder = log_range - base_range;
|
||||
//the max_splits - 1 is used to calculate the ceiling of the division
|
||||
unsigned bit_length = ((((max_splits - 1) + remainder)/max_splits)
|
||||
+ base_iterations + min_size);
|
||||
//Preventing overflow; this situation shouldn't occur
|
||||
if (bit_length >= (8 * sizeof(size_t)))
|
||||
return typed_one << ((8 * sizeof(size_t)) - 1);
|
||||
//n(log_range)/max_splits + C, optimizing worst-case performance
|
||||
return typed_one << bit_length;
|
||||
}
|
||||
|
||||
// Resizes the bin cache and bin sizes, and initializes each bin size to 0.
|
||||
// This generates the memory overhead to use in radix sorting.
|
||||
template <class RandomAccessIter>
|
||||
inline RandomAccessIter *
|
||||
size_bins(size_t *bin_sizes, std::vector<RandomAccessIter>
|
||||
&bin_cache, unsigned cache_offset, unsigned &cache_end, unsigned bin_count)
|
||||
{
|
||||
// Clear the bin sizes
|
||||
for (size_t u = 0; u < bin_count; u++)
|
||||
bin_sizes[u] = 0;
|
||||
//Make sure there is space for the bins
|
||||
cache_end = cache_offset + bin_count;
|
||||
if (cache_end > bin_cache.size())
|
||||
bin_cache.resize(cache_end);
|
||||
return &(bin_cache[cache_offset]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,819 @@
|
||||
// Details for a templated general-case hybrid-radix string_sort.
|
||||
|
||||
// Copyright Steven J. Ross 2001 - 2014.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// See http://www.boost.org/libs/sort for library home page.
|
||||
|
||||
/*
|
||||
Some improvements suggested by:
|
||||
Phil Endecott and Frank Gennari
|
||||
*/
|
||||
|
||||
#ifndef BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_HPP
|
||||
#define BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_HPP
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <functional>
|
||||
#include <boost/static_assert.hpp>
|
||||
#include <boost/serialization/static_warning.hpp>
|
||||
#include <boost/utility/enable_if.hpp>
|
||||
#include <boost/sort/spreadsort/detail/constants.hpp>
|
||||
#include <boost/sort/spreadsort/detail/spreadsort_common.hpp>
|
||||
#include <boost/cstdint.hpp>
|
||||
|
||||
namespace boost {
|
||||
namespace sort {
|
||||
namespace spreadsort {
|
||||
namespace detail {
|
||||
static const int max_step_size = 64;
|
||||
|
||||
//Offsetting on identical characters. This function works a chunk of
|
||||
//characters at a time for cache efficiency and optimal worst-case
|
||||
//performance.
|
||||
template<class RandomAccessIter, class Unsigned_char_type>
|
||||
inline void
|
||||
update_offset(RandomAccessIter first, RandomAccessIter finish,
|
||||
size_t &char_offset)
|
||||
{
|
||||
const int char_size = sizeof(Unsigned_char_type);
|
||||
size_t nextOffset = char_offset;
|
||||
int step_size = max_step_size / char_size;
|
||||
while (true) {
|
||||
RandomAccessIter curr = first;
|
||||
do {
|
||||
//Ignore empties, but if the nextOffset would exceed the length or
|
||||
//not match, exit; we've found the last matching character
|
||||
//This will reduce the step_size if the current step doesn't match.
|
||||
if ((*curr).size() > char_offset) {
|
||||
if((*curr).size() <= (nextOffset + step_size)) {
|
||||
step_size = (*curr).size() - nextOffset - 1;
|
||||
if (step_size < 1) {
|
||||
char_offset = nextOffset;
|
||||
return;
|
||||
}
|
||||
}
|
||||
const int step_byte_size = step_size * char_size;
|
||||
if (memcmp(curr->data() + nextOffset, first->data() + nextOffset,
|
||||
step_byte_size) != 0) {
|
||||
if (step_size == 1) {
|
||||
char_offset = nextOffset;
|
||||
return;
|
||||
}
|
||||
step_size = (step_size > 4) ? 4 : 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
++curr;
|
||||
} while (curr != finish);
|
||||
nextOffset += step_size;
|
||||
}
|
||||
}
|
||||
|
||||
//Offsetting on identical characters. This function works a character
|
||||
//at a time for optimal worst-case performance.
|
||||
template<class RandomAccessIter, class Get_char, class Get_length>
|
||||
inline void
|
||||
update_offset(RandomAccessIter first, RandomAccessIter finish,
|
||||
size_t &char_offset, Get_char getchar, Get_length length)
|
||||
{
|
||||
size_t nextOffset = char_offset;
|
||||
while (true) {
|
||||
RandomAccessIter curr = first;
|
||||
do {
|
||||
//ignore empties, but if the nextOffset would exceed the length or
|
||||
//not match, exit; we've found the last matching character
|
||||
if (length(*curr) > char_offset && (length(*curr) <= (nextOffset + 1)
|
||||
|| getchar((*curr), nextOffset) != getchar((*first), nextOffset))) {
|
||||
char_offset = nextOffset;
|
||||
return;
|
||||
}
|
||||
} while (++curr != finish);
|
||||
++nextOffset;
|
||||
}
|
||||
}
|
||||
|
||||
//This comparison functor assumes strings are identical up to char_offset
|
||||
template<class Data_type, class Unsigned_char_type>
|
||||
struct offset_less_than {
|
||||
offset_less_than(size_t char_offset) : fchar_offset(char_offset){}
|
||||
inline bool operator()(const Data_type &x, const Data_type &y) const
|
||||
{
|
||||
size_t minSize = (std::min)(x.size(), y.size());
|
||||
for (size_t u = fchar_offset; u < minSize; ++u) {
|
||||
BOOST_STATIC_ASSERT(sizeof(x[u]) == sizeof(Unsigned_char_type));
|
||||
if (static_cast<Unsigned_char_type>(x[u]) !=
|
||||
static_cast<Unsigned_char_type>(y[u])) {
|
||||
return static_cast<Unsigned_char_type>(x[u]) <
|
||||
static_cast<Unsigned_char_type>(y[u]);
|
||||
}
|
||||
}
|
||||
return x.size() < y.size();
|
||||
}
|
||||
size_t fchar_offset;
|
||||
};
|
||||
|
||||
//Compares strings assuming they are identical up to char_offset
|
||||
template<class Data_type, class Unsigned_char_type>
|
||||
struct offset_greater_than {
|
||||
offset_greater_than(size_t char_offset) : fchar_offset(char_offset){}
|
||||
inline bool operator()(const Data_type &x, const Data_type &y) const
|
||||
{
|
||||
size_t minSize = (std::min)(x.size(), y.size());
|
||||
for (size_t u = fchar_offset; u < minSize; ++u) {
|
||||
BOOST_STATIC_ASSERT(sizeof(x[u]) == sizeof(Unsigned_char_type));
|
||||
if (static_cast<Unsigned_char_type>(x[u]) !=
|
||||
static_cast<Unsigned_char_type>(y[u])) {
|
||||
return static_cast<Unsigned_char_type>(x[u]) >
|
||||
static_cast<Unsigned_char_type>(y[u]);
|
||||
}
|
||||
}
|
||||
return x.size() > y.size();
|
||||
}
|
||||
size_t fchar_offset;
|
||||
};
|
||||
|
||||
//This comparison functor assumes strings are identical up to char_offset
|
||||
template<class Data_type, class Get_char, class Get_length>
|
||||
struct offset_char_less_than {
|
||||
offset_char_less_than(size_t char_offset) : fchar_offset(char_offset){}
|
||||
inline bool operator()(const Data_type &x, const Data_type &y) const
|
||||
{
|
||||
size_t minSize = (std::min)(length(x), length(y));
|
||||
for (size_t u = fchar_offset; u < minSize; ++u) {
|
||||
if (getchar(x, u) != getchar(y, u)) {
|
||||
return getchar(x, u) < getchar(y, u);
|
||||
}
|
||||
}
|
||||
return length(x) < length(y);
|
||||
}
|
||||
size_t fchar_offset;
|
||||
Get_char getchar;
|
||||
Get_length length;
|
||||
};
|
||||
|
||||
//String sorting recursive implementation
|
||||
template <class RandomAccessIter, class Unsigned_char_type>
|
||||
inline void
|
||||
string_sort_rec(RandomAccessIter first, RandomAccessIter last,
|
||||
size_t char_offset,
|
||||
std::vector<RandomAccessIter> &bin_cache,
|
||||
unsigned cache_offset, size_t *bin_sizes)
|
||||
{
|
||||
typedef typename std::iterator_traits<RandomAccessIter>::value_type
|
||||
Data_type;
|
||||
//This section makes handling of long identical substrings much faster
|
||||
//with a mild average performance impact.
|
||||
//Iterate to the end of the empties. If all empty, return
|
||||
while ((*first).size() <= char_offset) {
|
||||
if (++first == last)
|
||||
return;
|
||||
}
|
||||
RandomAccessIter finish = last - 1;
|
||||
//Getting the last non-empty
|
||||
for (;(*finish).size() <= char_offset; --finish);
|
||||
++finish;
|
||||
//Offsetting on identical characters. This section works
|
||||
//a few characters at a time for optimal worst-case performance.
|
||||
update_offset<RandomAccessIter, Unsigned_char_type>(first, finish,
|
||||
char_offset);
|
||||
|
||||
const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
|
||||
//Equal worst-case of radix and comparison is when bin_count = n*log(n).
|
||||
const unsigned max_size = bin_count;
|
||||
const unsigned membin_count = bin_count + 1;
|
||||
unsigned cache_end;
|
||||
RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
|
||||
cache_end, membin_count) + 1;
|
||||
|
||||
//Calculating the size of each bin; this takes roughly 10% of runtime
|
||||
for (RandomAccessIter current = first; current != last; ++current) {
|
||||
if ((*current).size() <= char_offset) {
|
||||
bin_sizes[0]++;
|
||||
}
|
||||
else
|
||||
bin_sizes[static_cast<Unsigned_char_type>((*current)[char_offset])
|
||||
+ 1]++;
|
||||
}
|
||||
//Assign the bin positions
|
||||
bin_cache[cache_offset] = first;
|
||||
for (unsigned u = 0; u < membin_count - 1; u++)
|
||||
bin_cache[cache_offset + u + 1] =
|
||||
bin_cache[cache_offset + u] + bin_sizes[u];
|
||||
|
||||
//Swap into place
|
||||
RandomAccessIter next_bin_start = first;
|
||||
//handling empty bins
|
||||
RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
|
||||
next_bin_start += bin_sizes[0];
|
||||
RandomAccessIter * target_bin;
|
||||
//Iterating over each element in the bin of empties
|
||||
for (RandomAccessIter current = *local_bin; current < next_bin_start;
|
||||
++current) {
|
||||
//empties belong in this bin
|
||||
while ((*current).size() > char_offset) {
|
||||
target_bin =
|
||||
bins + static_cast<Unsigned_char_type>((*current)[char_offset]);
|
||||
iter_swap(current, (*target_bin)++);
|
||||
}
|
||||
}
|
||||
*local_bin = next_bin_start;
|
||||
//iterate backwards to find the last bin with elements in it
|
||||
//this saves iterations in multiple loops
|
||||
unsigned last_bin = bin_count - 1;
|
||||
for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
|
||||
//This dominates runtime, mostly in the swap and bin lookups
|
||||
for (unsigned u = 0; u < last_bin; ++u) {
|
||||
local_bin = bins + u;
|
||||
next_bin_start += bin_sizes[u + 1];
|
||||
//Iterating over each element in this bin
|
||||
for (RandomAccessIter current = *local_bin; current < next_bin_start;
|
||||
++current) {
|
||||
//Swapping into place until the correct element has been swapped in
|
||||
for (target_bin = bins + static_cast<Unsigned_char_type>
|
||||
((*current)[char_offset]); target_bin != local_bin;
|
||||
target_bin = bins + static_cast<Unsigned_char_type>
|
||||
((*current)[char_offset])) iter_swap(current, (*target_bin)++);
|
||||
}
|
||||
*local_bin = next_bin_start;
|
||||
}
|
||||
bins[last_bin] = last;
|
||||
//Recursing
|
||||
RandomAccessIter lastPos = bin_cache[cache_offset];
|
||||
//Skip this loop for empties
|
||||
for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
|
||||
lastPos = bin_cache[u], ++u) {
|
||||
size_t count = bin_cache[u] - lastPos;
|
||||
//don't sort unless there are at least two items to Compare
|
||||
if (count < 2)
|
||||
continue;
|
||||
//using std::sort if its worst-case is better
|
||||
if (count < max_size)
|
||||
std::sort(lastPos, bin_cache[u],
|
||||
offset_less_than<Data_type, Unsigned_char_type>(char_offset + 1));
|
||||
else
|
||||
string_sort_rec<RandomAccessIter, Unsigned_char_type>(lastPos,
|
||||
bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
|
||||
}
|
||||
}
|
||||
|
||||
//Sorts strings in reverse order, with empties at the end
|
||||
template <class RandomAccessIter, class Unsigned_char_type>
|
||||
inline void
|
||||
reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last,
|
||||
size_t char_offset,
|
||||
std::vector<RandomAccessIter> &bin_cache,
|
||||
unsigned cache_offset,
|
||||
size_t *bin_sizes)
|
||||
{
|
||||
typedef typename std::iterator_traits<RandomAccessIter>::value_type
|
||||
Data_type;
|
||||
//This section makes handling of long identical substrings much faster
|
||||
//with a mild average performance impact.
|
||||
RandomAccessIter curr = first;
|
||||
//Iterate to the end of the empties. If all empty, return
|
||||
while ((*curr).size() <= char_offset) {
|
||||
if (++curr == last)
|
||||
return;
|
||||
}
|
||||
//Getting the last non-empty
|
||||
while ((*(--last)).size() <= char_offset);
|
||||
++last;
|
||||
//Offsetting on identical characters. This section works
|
||||
//a few characters at a time for optimal worst-case performance.
|
||||
update_offset<RandomAccessIter, Unsigned_char_type>(curr, last,
|
||||
char_offset);
|
||||
RandomAccessIter * target_bin;
|
||||
|
||||
const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
|
||||
//Equal worst-case of radix and comparison when bin_count = n*log(n).
|
||||
const unsigned max_size = bin_count;
|
||||
const unsigned membin_count = bin_count + 1;
|
||||
const unsigned max_bin = bin_count - 1;
|
||||
unsigned cache_end;
|
||||
RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
|
||||
cache_end, membin_count);
|
||||
RandomAccessIter * end_bin = &(bin_cache[cache_offset + max_bin]);
|
||||
|
||||
//Calculating the size of each bin; this takes roughly 10% of runtime
|
||||
for (RandomAccessIter current = first; current != last; ++current) {
|
||||
if ((*current).size() <= char_offset) {
|
||||
bin_sizes[bin_count]++;
|
||||
}
|
||||
else
|
||||
bin_sizes[max_bin - static_cast<Unsigned_char_type>
|
||||
((*current)[char_offset])]++;
|
||||
}
|
||||
//Assign the bin positions
|
||||
bin_cache[cache_offset] = first;
|
||||
for (unsigned u = 0; u < membin_count - 1; u++)
|
||||
bin_cache[cache_offset + u + 1] =
|
||||
bin_cache[cache_offset + u] + bin_sizes[u];
|
||||
|
||||
//Swap into place
|
||||
RandomAccessIter next_bin_start = last;
|
||||
//handling empty bins
|
||||
RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
|
||||
RandomAccessIter lastFull = *local_bin;
|
||||
//Iterating over each element in the bin of empties
|
||||
for (RandomAccessIter current = *local_bin; current < next_bin_start;
|
||||
++current) {
|
||||
//empties belong in this bin
|
||||
while ((*current).size() > char_offset) {
|
||||
target_bin =
|
||||
end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]);
|
||||
iter_swap(current, (*target_bin)++);
|
||||
}
|
||||
}
|
||||
*local_bin = next_bin_start;
|
||||
next_bin_start = first;
|
||||
//iterate backwards to find the last non-empty bin
|
||||
//this saves iterations in multiple loops
|
||||
unsigned last_bin = max_bin;
|
||||
for (; last_bin && !bin_sizes[last_bin]; --last_bin);
|
||||
//This dominates runtime, mostly in the swap and bin lookups
|
||||
for (unsigned u = 0; u < last_bin; ++u) {
|
||||
local_bin = bins + u;
|
||||
next_bin_start += bin_sizes[u];
|
||||
//Iterating over each element in this bin
|
||||
for (RandomAccessIter current = *local_bin; current < next_bin_start;
|
||||
++current) {
|
||||
//Swapping into place until the correct element has been swapped in
|
||||
for (target_bin =
|
||||
end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]);
|
||||
target_bin != local_bin;
|
||||
target_bin =
|
||||
end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]))
|
||||
iter_swap(current, (*target_bin)++);
|
||||
}
|
||||
*local_bin = next_bin_start;
|
||||
}
|
||||
bins[last_bin] = lastFull;
|
||||
//Recursing
|
||||
RandomAccessIter lastPos = first;
|
||||
//Skip this loop for empties
|
||||
for (unsigned u = cache_offset; u <= cache_offset + last_bin;
|
||||
lastPos = bin_cache[u], ++u) {
|
||||
size_t count = bin_cache[u] - lastPos;
|
||||
//don't sort unless there are at least two items to Compare
|
||||
if (count < 2)
|
||||
continue;
|
||||
//using std::sort if its worst-case is better
|
||||
if (count < max_size)
|
||||
std::sort(lastPos, bin_cache[u], offset_greater_than<Data_type,
|
||||
Unsigned_char_type>(char_offset + 1));
|
||||
else
|
||||
reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type>
|
||||
(lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
|
||||
}
|
||||
}
|
||||
|
||||
//String sorting recursive implementation
|
||||
template <class RandomAccessIter, class Unsigned_char_type, class Get_char,
|
||||
class Get_length>
|
||||
inline void
|
||||
string_sort_rec(RandomAccessIter first, RandomAccessIter last,
|
||||
size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
|
||||
unsigned cache_offset, size_t *bin_sizes,
|
||||
Get_char getchar, Get_length length)
|
||||
{
|
||||
typedef typename std::iterator_traits<RandomAccessIter>::value_type
|
||||
Data_type;
|
||||
//This section makes handling of long identical substrings much faster
|
||||
//with a mild average performance impact.
|
||||
//Iterate to the end of the empties. If all empty, return
|
||||
while (length(*first) <= char_offset) {
|
||||
if (++first == last)
|
||||
return;
|
||||
}
|
||||
RandomAccessIter finish = last - 1;
|
||||
//Getting the last non-empty
|
||||
for (;length(*finish) <= char_offset; --finish);
|
||||
++finish;
|
||||
update_offset(first, finish, char_offset, getchar, length);
|
||||
|
||||
const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
|
||||
//Equal worst-case of radix and comparison is when bin_count = n*log(n).
|
||||
const unsigned max_size = bin_count;
|
||||
const unsigned membin_count = bin_count + 1;
|
||||
unsigned cache_end;
|
||||
RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
|
||||
cache_end, membin_count) + 1;
|
||||
|
||||
//Calculating the size of each bin; this takes roughly 10% of runtime
|
||||
for (RandomAccessIter current = first; current != last; ++current) {
|
||||
if (length(*current) <= char_offset) {
|
||||
bin_sizes[0]++;
|
||||
}
|
||||
else
|
||||
bin_sizes[getchar((*current), char_offset) + 1]++;
|
||||
}
|
||||
//Assign the bin positions
|
||||
bin_cache[cache_offset] = first;
|
||||
for (unsigned u = 0; u < membin_count - 1; u++)
|
||||
bin_cache[cache_offset + u + 1] =
|
||||
bin_cache[cache_offset + u] + bin_sizes[u];
|
||||
|
||||
//Swap into place
|
||||
RandomAccessIter next_bin_start = first;
|
||||
//handling empty bins
|
||||
RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
|
||||
next_bin_start += bin_sizes[0];
|
||||
RandomAccessIter * target_bin;
|
||||
//Iterating over each element in the bin of empties
|
||||
for (RandomAccessIter current = *local_bin; current < next_bin_start;
|
||||
++current) {
|
||||
//empties belong in this bin
|
||||
while (length(*current) > char_offset) {
|
||||
target_bin = bins + getchar((*current), char_offset);
|
||||
iter_swap(current, (*target_bin)++);
|
||||
}
|
||||
}
|
||||
*local_bin = next_bin_start;
|
||||
//iterate backwards to find the last bin with elements in it
|
||||
//this saves iterations in multiple loops
|
||||
unsigned last_bin = bin_count - 1;
|
||||
for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
|
||||
//This dominates runtime, mostly in the swap and bin lookups
|
||||
for (unsigned ii = 0; ii < last_bin; ++ii) {
|
||||
local_bin = bins + ii;
|
||||
next_bin_start += bin_sizes[ii + 1];
|
||||
//Iterating over each element in this bin
|
||||
for (RandomAccessIter current = *local_bin; current < next_bin_start;
|
||||
++current) {
|
||||
//Swapping into place until the correct element has been swapped in
|
||||
for (target_bin = bins + getchar((*current), char_offset);
|
||||
target_bin != local_bin;
|
||||
target_bin = bins + getchar((*current), char_offset))
|
||||
iter_swap(current, (*target_bin)++);
|
||||
}
|
||||
*local_bin = next_bin_start;
|
||||
}
|
||||
bins[last_bin] = last;
|
||||
|
||||
//Recursing
|
||||
RandomAccessIter lastPos = bin_cache[cache_offset];
|
||||
//Skip this loop for empties
|
||||
for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
|
||||
lastPos = bin_cache[u], ++u) {
|
||||
size_t count = bin_cache[u] - lastPos;
|
||||
//don't sort unless there are at least two items to Compare
|
||||
if (count < 2)
|
||||
continue;
|
||||
//using std::sort if its worst-case is better
|
||||
if (count < max_size)
|
||||
std::sort(lastPos, bin_cache[u], offset_char_less_than<Data_type,
|
||||
Get_char, Get_length>(char_offset + 1));
|
||||
else
|
||||
string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
|
||||
Get_length>(lastPos, bin_cache[u], char_offset + 1, bin_cache,
|
||||
cache_end, bin_sizes, getchar, length);
|
||||
}
|
||||
}
|
||||
|
||||
//String sorting recursive implementation
|
||||
template <class RandomAccessIter, class Unsigned_char_type, class Get_char,
|
||||
class Get_length, class Compare>
|
||||
inline void
|
||||
string_sort_rec(RandomAccessIter first, RandomAccessIter last,
|
||||
size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
|
||||
unsigned cache_offset, size_t *bin_sizes,
|
||||
Get_char getchar, Get_length length, Compare comp)
|
||||
{
|
||||
//This section makes handling of long identical substrings much faster
|
||||
//with a mild average performance impact.
|
||||
//Iterate to the end of the empties. If all empty, return
|
||||
while (length(*first) <= char_offset) {
|
||||
if (++first == last)
|
||||
return;
|
||||
}
|
||||
RandomAccessIter finish = last - 1;
|
||||
//Getting the last non-empty
|
||||
for (;length(*finish) <= char_offset; --finish);
|
||||
++finish;
|
||||
update_offset(first, finish, char_offset, getchar, length);
|
||||
|
||||
const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
|
||||
//Equal worst-case of radix and comparison is when bin_count = n*log(n).
|
||||
const unsigned max_size = bin_count;
|
||||
const unsigned membin_count = bin_count + 1;
|
||||
unsigned cache_end;
|
||||
RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
|
||||
cache_end, membin_count) + 1;
|
||||
|
||||
//Calculating the size of each bin; this takes roughly 10% of runtime
|
||||
for (RandomAccessIter current = first; current != last; ++current) {
|
||||
if (length(*current) <= char_offset) {
|
||||
bin_sizes[0]++;
|
||||
}
|
||||
else
|
||||
bin_sizes[getchar((*current), char_offset) + 1]++;
|
||||
}
|
||||
//Assign the bin positions
|
||||
bin_cache[cache_offset] = first;
|
||||
for (unsigned u = 0; u < membin_count - 1; u++)
|
||||
bin_cache[cache_offset + u + 1] =
|
||||
bin_cache[cache_offset + u] + bin_sizes[u];
|
||||
|
||||
//Swap into place
|
||||
RandomAccessIter next_bin_start = first;
|
||||
//handling empty bins
|
||||
RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
|
||||
next_bin_start += bin_sizes[0];
|
||||
RandomAccessIter * target_bin;
|
||||
//Iterating over each element in the bin of empties
|
||||
for (RandomAccessIter current = *local_bin; current < next_bin_start;
|
||||
++current) {
|
||||
//empties belong in this bin
|
||||
while (length(*current) > char_offset) {
|
||||
target_bin = bins + getchar((*current), char_offset);
|
||||
iter_swap(current, (*target_bin)++);
|
||||
}
|
||||
}
|
||||
*local_bin = next_bin_start;
|
||||
//iterate backwards to find the last bin with elements in it
|
||||
//this saves iterations in multiple loops
|
||||
unsigned last_bin = bin_count - 1;
|
||||
for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
|
||||
//This dominates runtime, mostly in the swap and bin lookups
|
||||
for (unsigned u = 0; u < last_bin; ++u) {
|
||||
local_bin = bins + u;
|
||||
next_bin_start += bin_sizes[u + 1];
|
||||
//Iterating over each element in this bin
|
||||
for (RandomAccessIter current = *local_bin; current < next_bin_start;
|
||||
++current) {
|
||||
//Swapping into place until the correct element has been swapped in
|
||||
for (target_bin = bins + getchar((*current), char_offset);
|
||||
target_bin != local_bin;
|
||||
target_bin = bins + getchar((*current), char_offset))
|
||||
iter_swap(current, (*target_bin)++);
|
||||
}
|
||||
*local_bin = next_bin_start;
|
||||
}
|
||||
bins[last_bin] = last;
|
||||
|
||||
//Recursing
|
||||
RandomAccessIter lastPos = bin_cache[cache_offset];
|
||||
//Skip this loop for empties
|
||||
for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
|
||||
lastPos = bin_cache[u], ++u) {
|
||||
size_t count = bin_cache[u] - lastPos;
|
||||
//don't sort unless there are at least two items to Compare
|
||||
if (count < 2)
|
||||
continue;
|
||||
//using std::sort if its worst-case is better
|
||||
if (count < max_size)
|
||||
std::sort(lastPos, bin_cache[u], comp);
|
||||
else
|
||||
string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
|
||||
Get_length, Compare>
|
||||
(lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end,
|
||||
bin_sizes, getchar, length, comp);
|
||||
}
|
||||
}
|
||||
|
||||
//Sorts strings in reverse order, with empties at the end
|
||||
template <class RandomAccessIter, class Unsigned_char_type, class Get_char,
|
||||
class Get_length, class Compare>
|
||||
inline void
|
||||
reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last,
|
||||
size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
|
||||
unsigned cache_offset, size_t *bin_sizes,
|
||||
Get_char getchar, Get_length length, Compare comp)
|
||||
{
|
||||
//This section makes handling of long identical substrings much faster
|
||||
//with a mild average performance impact.
|
||||
RandomAccessIter curr = first;
|
||||
//Iterate to the end of the empties. If all empty, return
|
||||
while (length(*curr) <= char_offset) {
|
||||
if (++curr == last)
|
||||
return;
|
||||
}
|
||||
//Getting the last non-empty
|
||||
while (length(*(--last)) <= char_offset);
|
||||
++last;
|
||||
//Offsetting on identical characters. This section works
|
||||
//a character at a time for optimal worst-case performance.
|
||||
update_offset(curr, last, char_offset, getchar, length);
|
||||
|
||||
const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
|
||||
//Equal worst-case of radix and comparison is when bin_count = n*log(n).
|
||||
const unsigned max_size = bin_count;
|
||||
const unsigned membin_count = bin_count + 1;
|
||||
const unsigned max_bin = bin_count - 1;
|
||||
unsigned cache_end;
|
||||
RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
|
||||
cache_end, membin_count);
|
||||
RandomAccessIter *end_bin = &(bin_cache[cache_offset + max_bin]);
|
||||
|
||||
//Calculating the size of each bin; this takes roughly 10% of runtime
|
||||
for (RandomAccessIter current = first; current != last; ++current) {
|
||||
if (length(*current) <= char_offset) {
|
||||
bin_sizes[bin_count]++;
|
||||
}
|
||||
else
|
||||
bin_sizes[max_bin - getchar((*current), char_offset)]++;
|
||||
}
|
||||
//Assign the bin positions
|
||||
bin_cache[cache_offset] = first;
|
||||
for (unsigned u = 0; u < membin_count - 1; u++)
|
||||
bin_cache[cache_offset + u + 1] =
|
||||
bin_cache[cache_offset + u] + bin_sizes[u];
|
||||
|
||||
//Swap into place
|
||||
RandomAccessIter next_bin_start = last;
|
||||
//handling empty bins
|
||||
RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
|
||||
RandomAccessIter lastFull = *local_bin;
|
||||
RandomAccessIter * target_bin;
|
||||
//Iterating over each element in the bin of empties
|
||||
for (RandomAccessIter current = *local_bin; current < next_bin_start;
|
||||
++current) {
|
||||
//empties belong in this bin
|
||||
while (length(*current) > char_offset) {
|
||||
target_bin = end_bin - getchar((*current), char_offset);
|
||||
iter_swap(current, (*target_bin)++);
|
||||
}
|
||||
}
|
||||
*local_bin = next_bin_start;
|
||||
next_bin_start = first;
|
||||
//iterate backwards to find the last bin with elements in it
|
||||
//this saves iterations in multiple loops
|
||||
unsigned last_bin = max_bin;
|
||||
for (; last_bin && !bin_sizes[last_bin]; --last_bin);
|
||||
//This dominates runtime, mostly in the swap and bin lookups
|
||||
for (unsigned u = 0; u < last_bin; ++u) {
|
||||
local_bin = bins + u;
|
||||
next_bin_start += bin_sizes[u];
|
||||
//Iterating over each element in this bin
|
||||
for (RandomAccessIter current = *local_bin; current < next_bin_start;
|
||||
++current) {
|
||||
//Swapping into place until the correct element has been swapped in
|
||||
for (target_bin = end_bin - getchar((*current), char_offset);
|
||||
target_bin != local_bin;
|
||||
target_bin = end_bin - getchar((*current), char_offset))
|
||||
iter_swap(current, (*target_bin)++);
|
||||
}
|
||||
*local_bin = next_bin_start;
|
||||
}
|
||||
bins[last_bin] = lastFull;
|
||||
//Recursing
|
||||
RandomAccessIter lastPos = first;
|
||||
//Skip this loop for empties
|
||||
for (unsigned u = cache_offset; u <= cache_offset + last_bin;
|
||||
lastPos = bin_cache[u], ++u) {
|
||||
size_t count = bin_cache[u] - lastPos;
|
||||
//don't sort unless there are at least two items to Compare
|
||||
if (count < 2)
|
||||
continue;
|
||||
//using std::sort if its worst-case is better
|
||||
if (count < max_size)
|
||||
std::sort(lastPos, bin_cache[u], comp);
|
||||
else
|
||||
reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type,
|
||||
Get_char, Get_length, Compare>
|
||||
(lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end,
|
||||
bin_sizes, getchar, length, comp);
|
||||
}
|
||||
}
|
||||
|
||||
//Holds the bin vector and makes the initial recursive call
|
||||
template <class RandomAccessIter, class Unsigned_char_type>
|
||||
inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
|
||||
>::type
|
||||
string_sort(RandomAccessIter first, RandomAccessIter last,
|
||||
Unsigned_char_type)
|
||||
{
|
||||
size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
|
||||
std::vector<RandomAccessIter> bin_cache;
|
||||
string_sort_rec<RandomAccessIter, Unsigned_char_type>
|
||||
(first, last, 0, bin_cache, 0, bin_sizes);
|
||||
}
|
||||
|
||||
template <class RandomAccessIter, class Unsigned_char_type>
|
||||
inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
|
||||
>::type
|
||||
string_sort(RandomAccessIter first, RandomAccessIter last,
|
||||
Unsigned_char_type)
|
||||
{
|
||||
//Warning that we're using std::sort, even though string_sort was called
|
||||
BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
|
||||
std::sort(first, last);
|
||||
}
|
||||
|
||||
//Holds the bin vector and makes the initial recursive call
|
||||
template <class RandomAccessIter, class Unsigned_char_type>
|
||||
inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
|
||||
>::type
|
||||
reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
|
||||
Unsigned_char_type)
|
||||
{
|
||||
size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
|
||||
std::vector<RandomAccessIter> bin_cache;
|
||||
reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type>
|
||||
(first, last, 0, bin_cache, 0, bin_sizes);
|
||||
}
|
||||
|
||||
template <class RandomAccessIter, class Unsigned_char_type>
|
||||
inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
|
||||
>::type
|
||||
reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
|
||||
Unsigned_char_type)
|
||||
{
|
||||
typedef typename std::iterator_traits<RandomAccessIter>::value_type
|
||||
Data_type;
|
||||
//Warning that we're using std::sort, even though string_sort was called
|
||||
BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
|
||||
std::sort(first, last, std::greater<Data_type>());
|
||||
}
|
||||
|
||||
//Holds the bin vector and makes the initial recursive call
|
||||
template <class RandomAccessIter, class Get_char, class Get_length,
|
||||
class Unsigned_char_type>
|
||||
inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
|
||||
>::type
|
||||
string_sort(RandomAccessIter first, RandomAccessIter last,
|
||||
Get_char getchar, Get_length length, Unsigned_char_type)
|
||||
{
|
||||
size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
|
||||
std::vector<RandomAccessIter> bin_cache;
|
||||
string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
|
||||
Get_length>(first, last, 0, bin_cache, 0, bin_sizes, getchar, length);
|
||||
}
|
||||
|
||||
template <class RandomAccessIter, class Get_char, class Get_length,
|
||||
class Unsigned_char_type>
|
||||
inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
|
||||
>::type
|
||||
string_sort(RandomAccessIter first, RandomAccessIter last,
|
||||
Get_char getchar, Get_length length, Unsigned_char_type)
|
||||
{
|
||||
//Warning that we're using std::sort, even though string_sort was called
|
||||
BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
|
||||
std::sort(first, last);
|
||||
}
|
||||
|
||||
//Holds the bin vector and makes the initial recursive call
|
||||
template <class RandomAccessIter, class Get_char, class Get_length,
|
||||
class Compare, class Unsigned_char_type>
|
||||
inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
|
||||
>::type
|
||||
string_sort(RandomAccessIter first, RandomAccessIter last,
|
||||
Get_char getchar, Get_length length, Compare comp, Unsigned_char_type)
|
||||
{
|
||||
size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
|
||||
std::vector<RandomAccessIter> bin_cache;
|
||||
string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char
|
||||
, Get_length, Compare>
|
||||
(first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp);
|
||||
}
|
||||
|
||||
//disable_if_c was refusing to compile, so rewrote to use enable_if_c
|
||||
template <class RandomAccessIter, class Get_char, class Get_length,
|
||||
class Compare, class Unsigned_char_type>
|
||||
inline typename boost::enable_if_c< (sizeof(Unsigned_char_type) > 2), void
|
||||
>::type
|
||||
string_sort(RandomAccessIter first, RandomAccessIter last,
|
||||
Get_char getchar, Get_length length, Compare comp, Unsigned_char_type)
|
||||
{
|
||||
//Warning that we're using std::sort, even though string_sort was called
|
||||
BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
|
||||
std::sort(first, last, comp);
|
||||
}
|
||||
|
||||
//Holds the bin vector and makes the initial recursive call
|
||||
template <class RandomAccessIter, class Get_char, class Get_length,
|
||||
class Compare, class Unsigned_char_type>
|
||||
inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
|
||||
>::type
|
||||
reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
|
||||
Get_char getchar, Get_length length, Compare comp, Unsigned_char_type)
|
||||
{
|
||||
size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
|
||||
std::vector<RandomAccessIter> bin_cache;
|
||||
reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
|
||||
Get_length, Compare>
|
||||
(first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp);
|
||||
}
|
||||
|
||||
template <class RandomAccessIter, class Get_char, class Get_length,
|
||||
class Compare, class Unsigned_char_type>
|
||||
inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
|
||||
>::type
|
||||
reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
|
||||
Get_char getchar, Get_length length, Compare comp, Unsigned_char_type)
|
||||
{
|
||||
//Warning that we're using std::sort, even though string_sort was called
|
||||
BOOST_STATIC_WARNING( sizeof(Unsigned_char_type) <= 2 );
|
||||
std::sort(first, last, comp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,134 @@
|
||||
//Templated Spreadsort-based implementation of float_sort and float_mem_cast
|
||||
|
||||
// Copyright Steven J. Ross 2001 - 2014.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// See http://www.boost.org/libs/sort/ for library home page.
|
||||
|
||||
/*
|
||||
Some improvements suggested by:
|
||||
Phil Endecott and Frank Gennari
|
||||
float_mem_cast fix provided by:
|
||||
Scott McMurray
|
||||
*/
|
||||
|
||||
#ifndef BOOST_FLOAT_SORT_HPP
|
||||
#define BOOST_FLOAT_SORT_HPP
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <boost/static_assert.hpp>
|
||||
#include <boost/sort/spreadsort/detail/constants.hpp>
|
||||
#include <boost/sort/spreadsort/detail/float_sort.hpp>
|
||||
|
||||
namespace boost {
|
||||
namespace sort {
|
||||
namespace spreadsort {
|
||||
|
||||
/*!
|
||||
\brief Casts a float to the specified integer type.
|
||||
|
||||
\tparam Data_type Floating-point IEEE 754/IEC559 type.
|
||||
\tparam Cast_type Integer type (same size) to which to cast.
|
||||
|
||||
\par Example:
|
||||
\code
|
||||
struct rightshift {
|
||||
int operator()(const DATA_TYPE &x, const unsigned offset) const {
|
||||
return float_mem_cast<KEY_TYPE, CAST_TYPE>(x.key) >> offset;
|
||||
}
|
||||
};
|
||||
\endcode
|
||||
*/
|
||||
template<class Data_type, class Cast_type>
|
||||
inline Cast_type
|
||||
float_mem_cast(const Data_type & data)
|
||||
{
|
||||
// Only cast IEEE floating-point numbers, and only to a same-sized integer.
|
||||
BOOST_STATIC_ASSERT(sizeof(Cast_type) == sizeof(Data_type));
|
||||
BOOST_STATIC_ASSERT(std::numeric_limits<Data_type>::is_iec559);
|
||||
BOOST_STATIC_ASSERT(std::numeric_limits<Cast_type>::is_integer);
|
||||
Cast_type result;
|
||||
std::memcpy(&result, &data, sizeof(Cast_type));
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
\brief @c float_sort with casting to the appropriate size.
|
||||
|
||||
\param[in] first Iterator pointer to first element.
|
||||
\param[in] last Iterator pointing to one beyond the end of data.
|
||||
|
||||
Some performance plots of runtime vs. n and log(range) are provided:\n
|
||||
<a href="../../doc/graph/windows_float_sort.htm"> windows_float_sort</a>
|
||||
\n
|
||||
<a href="../../doc/graph/osx_float_sort.htm"> osx_float_sort</a>
|
||||
|
||||
|
||||
|
||||
\par A simple example of sorting some floating-point is:
|
||||
\code
|
||||
vector<float> vec;
|
||||
vec.push_back(1.0);
|
||||
vec.push_back(2.3);
|
||||
vec.push_back(1.3);
|
||||
spreadsort(vec.begin(), vec.end());
|
||||
\endcode
|
||||
\par The sorted vector contains ascending values "1.0 1.3 2.3".
|
||||
|
||||
*/
|
||||
template <class RandomAccessIter>
|
||||
inline void float_sort(RandomAccessIter first, RandomAccessIter last)
|
||||
{
|
||||
if (last - first < detail::min_sort_size)
|
||||
std::sort(first, last);
|
||||
else
|
||||
detail::float_sort(first, last);
|
||||
}
|
||||
|
||||
/*!
|
||||
\brief Floating-point sort algorithm using random access iterators with just right-shift functor.
|
||||
|
||||
\param[in] first Iterator pointer to first element.
|
||||
\param[in] last Iterator pointing to one beyond the end of data.
|
||||
\param[in] rshift Functor that returns the result of shifting the value_type right a specified number of bits.
|
||||
|
||||
*/
|
||||
template <class RandomAccessIter, class Right_shift>
|
||||
inline void float_sort(RandomAccessIter first, RandomAccessIter last,
|
||||
Right_shift rshift)
|
||||
{
|
||||
if (last - first < detail::min_sort_size)
|
||||
std::sort(first, last);
|
||||
else
|
||||
detail::float_sort(first, last, rshift(*first, 0), rshift);
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
\brief Float sort algorithm using random access iterators with both right-shift and user-defined comparison operator.
|
||||
|
||||
\param[in] first Iterator pointer to first element.
|
||||
\param[in] last Iterator pointing to one beyond the end of data.
|
||||
\param[in] rshift Functor that returns the result of shifting the value_type right a specified number of bits.
|
||||
\param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
|
||||
*/
|
||||
|
||||
template <class RandomAccessIter, class Right_shift, class Compare>
|
||||
inline void float_sort(RandomAccessIter first, RandomAccessIter last,
|
||||
Right_shift rshift, Compare comp)
|
||||
{
|
||||
if (last - first < detail::min_sort_size)
|
||||
std::sort(first, last, comp);
|
||||
else
|
||||
detail::float_sort(first, last, rshift(*first, 0), rshift, comp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,185 @@
|
||||
//Templated Spreadsort-based implementation of integer_sort
|
||||
|
||||
// Copyright Steven J. Ross 2001 - 2014.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// See http://www.boost.org/libs/sort/ for library home page.
|
||||
|
||||
/*
|
||||
Some improvements suggested by:
|
||||
Phil Endecott and Frank Gennari
|
||||
|
||||
Doxygen comments by Paul A. Bristow Jan 2015
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BOOST_INTEGER_SORT_HPP
|
||||
#define BOOST_INTEGER_SORT_HPP
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <boost/static_assert.hpp>
|
||||
#include <boost/sort/spreadsort/detail/constants.hpp>
|
||||
#include <boost/sort/spreadsort/detail/integer_sort.hpp>
|
||||
|
||||
namespace boost {
|
||||
namespace sort {
|
||||
namespace spreadsort {
|
||||
//Top-level sorting call for integers.
|
||||
|
||||
|
||||
/*! \brief Integer sort algorithm using random access iterators.
|
||||
(All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
|
||||
|
||||
\details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
|
||||
which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
|
||||
Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
|
||||
so @c integer_sort is asymptotically faster
|
||||
than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
|
||||
so its worst-case with default settings for 32-bit integers is
|
||||
<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
|
||||
Some performance plots of runtime vs. n and log(range) are provided:\n
|
||||
<a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
|
||||
\n
|
||||
<a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
|
||||
|
||||
\param[in] first Iterator pointer to first element.
|
||||
\param[in] last Iterator pointing to one beyond the end of data.
|
||||
|
||||
\pre [@c first, @c last) is a valid range.
|
||||
\pre @c RandomAccessIter @c value_type is mutable.
|
||||
\pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
|
||||
\pre @c RandomAccessIter @c value_type supports the @c operator>>,
|
||||
which returns an integer-type right-shifted a specified number of bits.
|
||||
\post The elements in the range [@c first, @c last) are sorted in ascending order.
|
||||
|
||||
\throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
|
||||
the right shift, subtraction of right-shifted elements, functors, or any operations on iterators throw.
|
||||
|
||||
\warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
|
||||
\warning Invalid arguments cause undefined behaviour.
|
||||
\note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
|
||||
enabling faster generic-programming.
|
||||
|
||||
\remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
|
||||
\remark * N is @c last - @c first,
|
||||
\remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
|
||||
\remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
|
||||
|
||||
*/
|
||||
template <class RandomAccessIter>
|
||||
inline void integer_sort(RandomAccessIter first, RandomAccessIter last)
|
||||
{
|
||||
// Don't sort if it's too small to optimize.
|
||||
if (last - first < detail::min_sort_size)
|
||||
std::sort(first, last);
|
||||
else
|
||||
detail::integer_sort(first, last, *first >> 0);
|
||||
}
|
||||
|
||||
/*! \brief Integer sort algorithm using random access iterators with both right-shift and user-defined comparison operator.
|
||||
(All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
|
||||
|
||||
\details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
|
||||
which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
|
||||
Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
|
||||
so @c integer_sort is asymptotically faster
|
||||
than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
|
||||
so its worst-case with default settings for 32-bit integers is
|
||||
<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
|
||||
Some performance plots of runtime vs. n and log(range) are provided:\n
|
||||
<a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
|
||||
\n
|
||||
<a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
|
||||
|
||||
\param[in] first Iterator pointer to first element.
|
||||
\param[in] last Iterator pointing to one beyond the end of data.
|
||||
\param[in] shift Functor that returns the result of shifting the value_type right a specified number of bits.
|
||||
\param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
|
||||
|
||||
\pre [@c first, @c last) is a valid range.
|
||||
\pre @c RandomAccessIter @c value_type is mutable.
|
||||
\post The elements in the range [@c first, @c last) are sorted in ascending order.
|
||||
|
||||
\return @c void.
|
||||
|
||||
\throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
|
||||
the right shift, subtraction of right-shifted elements, functors,
|
||||
or any operations on iterators throw.
|
||||
|
||||
\warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
|
||||
\warning Invalid arguments cause undefined behaviour.
|
||||
\note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
|
||||
enabling faster generic-programming.
|
||||
|
||||
\remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
|
||||
\remark * N is @c last - @c first,
|
||||
\remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
|
||||
\remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
|
||||
*/
|
||||
template <class RandomAccessIter, class Right_shift, class Compare>
|
||||
inline void integer_sort(RandomAccessIter first, RandomAccessIter last,
|
||||
Right_shift shift, Compare comp) {
|
||||
if (last - first < detail::min_sort_size)
|
||||
std::sort(first, last, comp);
|
||||
else
|
||||
detail::integer_sort(first, last, shift(*first, 0), shift, comp);
|
||||
}
|
||||
|
||||
/*! \brief Integer sort algorithm using random access iterators with just right-shift functor.
|
||||
(All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
|
||||
|
||||
\details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
|
||||
which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
|
||||
|
||||
\par Performance:
|
||||
Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
|
||||
so @c integer_sort is asymptotically faster
|
||||
than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
|
||||
so its worst-case with default settings for 32-bit integers is
|
||||
<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
|
||||
Some performance plots of runtime vs. n and log(range) are provided:\n
|
||||
* <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>\n
|
||||
* <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
|
||||
|
||||
\param[in] first Iterator pointer to first element.
|
||||
\param[in] last Iterator pointing to one beyond the end of data.
|
||||
\param[in] shift A functor that returns the result of shifting the value_type right a specified number of bits.
|
||||
|
||||
\pre [@c first, @c last) is a valid range.
|
||||
\pre @c RandomAccessIter @c value_type is mutable.
|
||||
\pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
|
||||
\post The elements in the range [@c first, @c last) are sorted in ascending order.
|
||||
|
||||
\throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
|
||||
the right shift, subtraction of right-shifted elements, functors,
|
||||
or any operations on iterators throw.
|
||||
|
||||
\warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
|
||||
\warning Invalid arguments cause undefined behaviour.
|
||||
\note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
|
||||
enabling faster generic-programming.
|
||||
|
||||
\remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
|
||||
\remark * N is @c last - @c first,
|
||||
\remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
|
||||
\remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
|
||||
|
||||
*/
|
||||
template <class RandomAccessIter, class Right_shift>
|
||||
inline void integer_sort(RandomAccessIter first, RandomAccessIter last,
|
||||
Right_shift shift) {
|
||||
if (last - first < detail::min_sort_size)
|
||||
std::sort(first, last);
|
||||
else
|
||||
detail::integer_sort(first, last, shift(*first, 0), shift);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -0,0 +1,146 @@
|
||||
// Templated generic hybrid sorting
|
||||
|
||||
// Copyright Steven J. Ross 2001 - 2009.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// See http://www.boost.org/libs/sort/ for library home page.
|
||||
|
||||
/*
|
||||
Some improvements suggested by:
|
||||
Phil Endecott and Frank Gennari
|
||||
float_mem_cast fix provided by:
|
||||
Scott McMurray
|
||||
*/
|
||||
|
||||
#ifndef BOOST_SORT_SPREADSORT_HPP
|
||||
#define BOOST_SORT_SPREADSORT_HPP
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <limits>
|
||||
#include <boost/type_traits.hpp>
|
||||
#include <boost/sort/spreadsort/integer_sort.hpp>
|
||||
#include <boost/sort/spreadsort/float_sort.hpp>
|
||||
#include <boost/sort/spreadsort/string_sort.hpp>
|
||||
|
||||
namespace boost {
|
||||
namespace sort {
|
||||
|
||||
/*! Namespace for spreadsort sort variants for different data types.
|
||||
\note Use hyperlinks (coloured) to get detailed information about functions.
|
||||
*/
|
||||
namespace spreadsort {
|
||||
|
||||
/*!
|
||||
\brief Generic @c spreadsort variant detecting integer-type elements so call to @c integer_sort.
|
||||
\details If the data type provided is an integer, @c integer_sort is used.
|
||||
\note Sorting other data types requires picking between @c integer_sort, @c float_sort and @c string_sort directly,
|
||||
as @c spreadsort won't accept types that don't have the appropriate @c type_traits.
|
||||
\param[in] first Iterator pointer to first element.
|
||||
\param[in] last Iterator pointing to one beyond the end of data.
|
||||
|
||||
\pre [@c first, @c last) is a valid range.
|
||||
\pre @c RandomAccessIter @c value_type is mutable.
|
||||
\pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
|
||||
\pre @c RandomAccessIter @c value_type supports the @c operator>>,
|
||||
which returns an integer-type right-shifted a specified number of bits.
|
||||
\post The elements in the range [@c first, @c last) are sorted in ascending order.
|
||||
*/
|
||||
|
||||
template <class RandomAccessIter>
|
||||
inline typename boost::enable_if_c< std::numeric_limits<
|
||||
typename std::iterator_traits<RandomAccessIter>::value_type >::is_integer,
|
||||
void >::type
|
||||
spreadsort(RandomAccessIter first, RandomAccessIter last)
|
||||
{
|
||||
integer_sort(first, last);
|
||||
}
|
||||
|
||||
/*!
|
||||
\brief Generic @c spreadsort variant detecting float element type so call to @c float_sort.
|
||||
\details If the data type provided is a float or castable-float, @c float_sort is used.
|
||||
\note Sorting other data types requires picking between @c integer_sort, @c float_sort and @c string_sort directly,
|
||||
as @c spreadsort won't accept types that don't have the appropriate @c type_traits.
|
||||
|
||||
\param[in] first Iterator pointer to first element.
|
||||
\param[in] last Iterator pointing to one beyond the end of data.
|
||||
|
||||
\pre [@c first, @c last) is a valid range.
|
||||
\pre @c RandomAccessIter @c value_type is mutable.
|
||||
\pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
|
||||
\pre @c RandomAccessIter @c value_type supports the @c operator>>,
|
||||
which returns an integer-type right-shifted a specified number of bits.
|
||||
\post The elements in the range [@c first, @c last) are sorted in ascending order.
|
||||
*/
|
||||
|
||||
template <class RandomAccessIter>
|
||||
inline typename boost::enable_if_c< !std::numeric_limits<
|
||||
typename std::iterator_traits<RandomAccessIter>::value_type >::is_integer
|
||||
&& std::numeric_limits<
|
||||
typename std::iterator_traits<RandomAccessIter>::value_type >::is_iec559,
|
||||
void >::type
|
||||
spreadsort(RandomAccessIter first, RandomAccessIter last)
|
||||
{
|
||||
float_sort(first, last);
|
||||
}
|
||||
|
||||
/*!
|
||||
\brief Generic @c spreadsort variant detecting string element type so call to @c string_sort for @c std::strings.
|
||||
\details If the data type provided is a string, @c string_sort is used.
|
||||
\note Sorting other data types requires picking between @c integer_sort, @c float_sort and @c string_sort directly,
|
||||
as @c spreadsort won't accept types that don't have the appropriate @c type_traits.
|
||||
|
||||
\param[in] first Iterator pointer to first element.
|
||||
\param[in] last Iterator pointing to one beyond the end of data.
|
||||
|
||||
\pre [@c first, @c last) is a valid range.
|
||||
\pre @c RandomAccessIter @c value_type is mutable.
|
||||
\pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
|
||||
\pre @c RandomAccessIter @c value_type supports the @c operator>>,
|
||||
which returns an integer-type right-shifted a specified number of bits.
|
||||
\post The elements in the range [@c first, @c last) are sorted in ascending order.
|
||||
*/
|
||||
|
||||
template <class RandomAccessIter>
|
||||
inline typename boost::enable_if_c<
|
||||
is_same<typename std::iterator_traits<RandomAccessIter>::value_type,
|
||||
typename std::string>::value, void >::type
|
||||
spreadsort(RandomAccessIter first, RandomAccessIter last)
|
||||
{
|
||||
string_sort(first, last);
|
||||
}
|
||||
|
||||
/*!
|
||||
\brief Generic @c spreadsort variant detecting string element type so call to @c string_sort for @c std::wstrings.
|
||||
\details If the data type provided is a wstring, @c string_sort is used.
|
||||
\note Sorting other data types requires picking between @c integer_sort, @c float_sort and @c string_sort directly,
|
||||
as @c spreadsort won't accept types that don't have the appropriate @c type_traits. Also, 2-byte wide-characters are the limit above which string_sort is inefficient, so on platforms with wider characters, this will not accept wstrings.
|
||||
|
||||
\param[in] first Iterator pointer to first element.
|
||||
\param[in] last Iterator pointing to one beyond the end of data.
|
||||
|
||||
\pre [@c first, @c last) is a valid range.
|
||||
\pre @c RandomAccessIter @c value_type is mutable.
|
||||
\pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
|
||||
\pre @c RandomAccessIter @c value_type supports the @c operator>>,
|
||||
which returns an integer-type right-shifted a specified number of bits.
|
||||
\post The elements in the range [@c first, @c last) are sorted in ascending order.
|
||||
*/
|
||||
template <class RandomAccessIter>
|
||||
inline typename boost::enable_if_c<
|
||||
is_same<typename std::iterator_traits<RandomAccessIter>::value_type,
|
||||
typename std::wstring>::value &&
|
||||
sizeof(wchar_t) == 2, void >::type
|
||||
spreadsort(RandomAccessIter first, RandomAccessIter last)
|
||||
{
|
||||
boost::uint16_t unused = 0;
|
||||
string_sort(first, last, unused);
|
||||
}
|
||||
} // namespace spreadsort
|
||||
} // namespace sort
|
||||
} // namespace boost
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,449 @@
|
||||
//Templated hybrid string_sort
|
||||
|
||||
// Copyright Steven J. Ross 2001 - 2009.
|
||||
// Distributed under the Boost Software License, Version 1.0.
|
||||
// (See accompanying file LICENSE_1_0.txt or copy at
|
||||
// http://www.boost.org/LICENSE_1_0.txt)
|
||||
|
||||
// See http://www.boost.org/libs/sort/ for library home page.
|
||||
|
||||
/*
|
||||
Some improvements suggested by:
|
||||
Phil Endecott and Frank Gennari
|
||||
*/
|
||||
|
||||
#ifndef BOOST_STRING_SORT_HPP
|
||||
#define BOOST_STRING_SORT_HPP
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <boost/static_assert.hpp>
|
||||
#include <boost/sort/spreadsort/detail/constants.hpp>
|
||||
#include <boost/sort/spreadsort/detail/string_sort.hpp>
|
||||
|
||||
namespace boost {
|
||||
namespace sort {
|
||||
namespace spreadsort {
|
||||
|
||||
/*! \brief String sort algorithm using random access iterators, allowing character-type overloads.\n
|
||||
(All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
|
||||
|
||||
\details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
|
||||
which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
|
||||
\par
|
||||
Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
|
||||
so @c integer_sort is asymptotically faster
|
||||
than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
|
||||
so its worst-case with default settings for 32-bit integers is
|
||||
<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
|
||||
Some performance plots of runtime vs. n and log(range) are provided:\n
|
||||
<a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n
|
||||
<a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
|
||||
|
||||
\tparam RandomAccessIter <a href="http://www.cplusplus.com/reference/iterator/RandomAccessIterator/">Random access iterator</a>
|
||||
\tparam Unsigned_char_type Unsigned character type used for string.
|
||||
\param[in] first Iterator pointer to first element.
|
||||
\param[in] last Iterator pointing to one beyond the end of data.
|
||||
\param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused.
|
||||
|
||||
\pre [@c first, @c last) is a valid range.
|
||||
\pre @c RandomAccessIter @c value_type is mutable.
|
||||
\pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
|
||||
\pre @c RandomAccessIter @c value_type supports the @c operator>>,
|
||||
which returns an integer-type right-shifted a specified number of bits.
|
||||
\post The elements in the range [@c first, @c last) are sorted in ascending order.
|
||||
|
||||
\throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
|
||||
the right shift, subtraction of right-shifted elements, functors,
|
||||
or any operations on iterators throw.
|
||||
|
||||
\warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
|
||||
\warning Invalid arguments cause undefined behaviour.
|
||||
\note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
|
||||
enabling faster generic-programming.
|
||||
|
||||
\remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
|
||||
\remark * N is @c last - @c first,
|
||||
\remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
|
||||
\remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
|
||||
|
||||
*/
|
||||
|
||||
template <class RandomAccessIter, class Unsigned_char_type>
|
||||
inline void string_sort(RandomAccessIter first, RandomAccessIter last,
|
||||
Unsigned_char_type unused)
|
||||
{
|
||||
//Don't sort if it's too small to optimize
|
||||
if (last - first < detail::min_sort_size)
|
||||
std::sort(first, last);
|
||||
else
|
||||
detail::string_sort(first, last, unused);
|
||||
}
|
||||
|
||||
|
||||
/*! \brief String sort algorithm using random access iterators, wraps using default of unsigned char.
|
||||
(All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
|
||||
|
||||
\details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm,
|
||||
which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
|
||||
Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
|
||||
so @c integer_sort is asymptotically faster
|
||||
than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
|
||||
so its worst-case with default settings for 32-bit integers is
|
||||
<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
|
||||
Some performance plots of runtime vs. n and log(range) are provided:\n
|
||||
<a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>
|
||||
\n
|
||||
<a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a>
|
||||
|
||||
\param[in] first Iterator pointer to first element.
|
||||
\param[in] last Iterator pointing to one beyond the end of data.
|
||||
|
||||
\pre [@c first, @c last) is a valid range.
|
||||
\pre @c RandomAccessIter @c value_type is mutable.
|
||||
\pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
|
||||
\pre @c RandomAccessIter @c value_type supports the @c operator>>,
|
||||
which returns an integer-type right-shifted a specified number of bits.
|
||||
\post The elements in the range [@c first, @c last) are sorted in ascending order.
|
||||
|
||||
\throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
|
||||
the right shift, subtraction of right-shifted elements, functors,
|
||||
or any operations on iterators throw.
|
||||
|
||||
\warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
|
||||
\warning Invalid arguments cause undefined behaviour.
|
||||
\note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
|
||||
enabling faster generic-programming.
|
||||
|
||||
\remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
|
||||
\remark * N is @c last - @c first,
|
||||
\remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
|
||||
\remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
|
||||
|
||||
*/
|
||||
template <class RandomAccessIter>
|
||||
inline void string_sort(RandomAccessIter first, RandomAccessIter last)
|
||||
{
|
||||
unsigned char unused = '\0';
|
||||
string_sort(first, last, unused);
|
||||
}
|
||||
|
||||
|
||||
/*! \brief String sort algorithm using random access iterators, allowing character-type overloads.
|
||||
|
||||
(All variants fall back to @c std::sort if the data size is too small, < detail::min_sort_size).
|
||||
|
||||
\details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
|
||||
which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
|
||||
Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
|
||||
so @c integer_sort is asymptotically faster
|
||||
than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
|
||||
so its worst-case with default settings for 32-bit integers is
|
||||
<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
|
||||
Some performance plots of runtime vs. n and log(range) are provided:\n
|
||||
<a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
|
||||
\n
|
||||
<a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
|
||||
|
||||
|
||||
\tparam RandomAccessIter <a href="http://www.cplusplus.com/reference/iterator/RandomAccessIterator/">Random access iterator</a>
|
||||
\tparam Comp Functor type to use for comparison.
|
||||
\tparam Unsigned_char_type Unsigned character type used for string.
|
||||
|
||||
\param[in] first Iterator pointer to first element.
|
||||
\param[in] last Iterator pointing to one beyond the end of data.
|
||||
\param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
|
||||
\param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused.
|
||||
|
||||
\pre [@c first, @c last) is a valid range.
|
||||
\pre @c RandomAccessIter @c value_type is mutable.
|
||||
\pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
|
||||
\pre @c RandomAccessIter @c value_type supports the @c operator>>,
|
||||
which returns an integer-type right-shifted a specified number of bits.
|
||||
\post The elements in the range [@c first, @c last) are sorted in ascending order.
|
||||
|
||||
\return @c void.
|
||||
|
||||
\throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
|
||||
the right shift, subtraction of right-shifted elements, functors,
|
||||
or any operations on iterators throw.
|
||||
|
||||
\warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
|
||||
\warning Invalid arguments cause undefined behaviour.
|
||||
\note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
|
||||
enabling faster generic-programming.
|
||||
|
||||
\remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
|
||||
\remark * N is @c last - @c first,
|
||||
\remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
|
||||
\remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
|
||||
*/
|
||||
template <class RandomAccessIter, class Compare, class Unsigned_char_type>
|
||||
inline void reverse_string_sort(RandomAccessIter first,
|
||||
RandomAccessIter last, Compare comp, Unsigned_char_type unused)
|
||||
{
|
||||
//Don't sort if it's too small to optimize.
|
||||
if (last - first < detail::min_sort_size)
|
||||
std::sort(first, last, comp);
|
||||
else
|
||||
detail::reverse_string_sort(first, last, unused);
|
||||
}
|
||||
|
||||
|
||||
/*! \brief String sort algorithm using random access iterators, wraps using default of @c unsigned char.
|
||||
|
||||
(All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
|
||||
|
||||
\details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
|
||||
which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
|
||||
Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
|
||||
so @c integer_sort is asymptotically faster
|
||||
than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
|
||||
so its worst-case with default settings for 32-bit integers is
|
||||
<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
|
||||
Some performance plots of runtime vs. n and log(range) are provided:\n
|
||||
<a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
|
||||
\n
|
||||
<a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
|
||||
|
||||
\param[in] first Iterator pointer to first element.
|
||||
\param[in] last Iterator pointing to one beyond the end of data.
|
||||
\param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
|
||||
|
||||
\pre [@c first, @c last) is a valid range.
|
||||
\pre @c RandomAccessIter @c value_type is mutable.
|
||||
\pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
|
||||
\pre @c RandomAccessIter @c value_type supports the @c operator>>,
|
||||
which returns an integer-type right-shifted a specified number of bits.
|
||||
\post The elements in the range [@c first, @c last) are sorted in ascending order.
|
||||
|
||||
\return @c void.
|
||||
|
||||
\throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
|
||||
the right shift, subtraction of right-shifted elements, functors,
|
||||
or any operations on iterators throw.
|
||||
|
||||
\warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
|
||||
\warning Invalid arguments cause undefined behaviour.
|
||||
\note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
|
||||
enabling faster generic-programming.
|
||||
|
||||
\remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
|
||||
\remark * N is @c last - @c first,
|
||||
\remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
|
||||
\remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
|
||||
*/
|
||||
template <class RandomAccessIter, class Compare>
|
||||
inline void reverse_string_sort(RandomAccessIter first,
|
||||
RandomAccessIter last, Compare comp)
|
||||
{
|
||||
unsigned char unused = '\0';
|
||||
reverse_string_sort(first, last, comp, unused);
|
||||
}
|
||||
|
||||
|
||||
/*! \brief String sort algorithm using random access iterators, wraps using default of @c unsigned char.
|
||||
|
||||
(All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
|
||||
|
||||
\details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
|
||||
which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
|
||||
Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
|
||||
so @c integer_sort is asymptotically faster
|
||||
than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
|
||||
so its worst-case with default settings for 32-bit integers is
|
||||
<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
|
||||
Some performance plots of runtime vs. n and log(range) are provided:\n
|
||||
<a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
|
||||
\n
|
||||
<a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
|
||||
|
||||
\param[in] first Iterator pointer to first element.
|
||||
\param[in] last Iterator pointing to one beyond the end of data.
|
||||
\param[in] getchar Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
|
||||
\param[in] length Functor to get the length of the string in characters.
|
||||
|
||||
\pre [@c first, @c last) is a valid range.
|
||||
\pre @c RandomAccessIter @c value_type is mutable.
|
||||
\pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
|
||||
\pre @c RandomAccessIter @c value_type supports the @c operator>>,
|
||||
which returns an integer-type right-shifted a specified number of bits.
|
||||
\post The elements in the range [@c first, @c last) are sorted in ascending order.
|
||||
|
||||
\return @c void.
|
||||
|
||||
\throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
|
||||
the right shift, subtraction of right-shifted elements, functors,
|
||||
or any operations on iterators throw.
|
||||
|
||||
\warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
|
||||
\warning Invalid arguments cause undefined behaviour.
|
||||
\note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
|
||||
enabling faster generic-programming.
|
||||
|
||||
\remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
|
||||
\remark * N is @c last - @c first,
|
||||
\remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
|
||||
\remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
|
||||
|
||||
*/
|
||||
template <class RandomAccessIter, class Get_char, class Get_length>
|
||||
inline void string_sort(RandomAccessIter first, RandomAccessIter last,
|
||||
Get_char getchar, Get_length length)
|
||||
{
|
||||
//Don't sort if it's too small to optimize
|
||||
if (last - first < detail::min_sort_size)
|
||||
std::sort(first, last);
|
||||
else {
|
||||
//skipping past empties, which allows us to get the character type
|
||||
//.empty() is not used so as not to require a user declaration of it
|
||||
while (!length(*first)) {
|
||||
if (++first == last)
|
||||
return;
|
||||
}
|
||||
detail::string_sort(first, last, getchar, length, getchar((*first), 0));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*! \brief String sort algorithm using random access iterators, wraps using default of @c unsigned char.
|
||||
|
||||
(All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
|
||||
|
||||
\details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
|
||||
which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
|
||||
Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
|
||||
so @c integer_sort is asymptotically faster
|
||||
than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
|
||||
so its worst-case with default settings for 32-bit integers is
|
||||
<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
|
||||
Some performance plots of runtime vs. n and log(range) are provided:\n
|
||||
<a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
|
||||
\n
|
||||
<a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
|
||||
|
||||
|
||||
\param[in] first Iterator pointer to first element.
|
||||
\param[in] last Iterator pointing to one beyond the end of data.
|
||||
\param[in] getchar Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
|
||||
\param[in] length Functor to get the length of the string in characters.
|
||||
\param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
|
||||
|
||||
|
||||
\pre [@c first, @c last) is a valid range.
|
||||
\pre @c RandomAccessIter @c value_type is mutable.
|
||||
\pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
|
||||
\post The elements in the range [@c first, @c last) are sorted in ascending order.
|
||||
|
||||
\return @c void.
|
||||
|
||||
\throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
|
||||
the right shift, subtraction of right-shifted elements, functors,
|
||||
or any operations on iterators throw.
|
||||
|
||||
\warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
|
||||
\warning Invalid arguments cause undefined behaviour.
|
||||
\note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
|
||||
enabling faster generic-programming.
|
||||
|
||||
\remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
|
||||
\remark * N is @c last - @c first,
|
||||
\remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
|
||||
\remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
|
||||
|
||||
*/
|
||||
template <class RandomAccessIter, class Get_char, class Get_length,
|
||||
class Compare>
|
||||
inline void string_sort(RandomAccessIter first, RandomAccessIter last,
|
||||
Get_char getchar, Get_length length, Compare comp)
|
||||
{
|
||||
//Don't sort if it's too small to optimize
|
||||
if (last - first < detail::min_sort_size)
|
||||
std::sort(first, last, comp);
|
||||
else {
|
||||
//skipping past empties, which allows us to get the character type
|
||||
//.empty() is not used so as not to require a user declaration of it
|
||||
while (!length(*first)) {
|
||||
if (++first == last)
|
||||
return;
|
||||
}
|
||||
detail::string_sort(first, last, getchar, length, comp,
|
||||
getchar((*first), 0));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*! \brief Reverse String sort algorithm using random access iterators.
|
||||
|
||||
(All variants fall back to @c std::sort if the data size is too small, < @c detail::min_sort_size).
|
||||
|
||||
\details @c integer_sort is a fast templated in-place hybrid radix/comparison algorithm,
|
||||
which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n
|
||||
Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>,
|
||||
so @c integer_sort is asymptotically faster
|
||||
than pure comparison-based algorithms. @c s is @c max_splits, which defaults to 11,
|
||||
so its worst-case with default settings for 32-bit integers is
|
||||
<em> O(N * ((32/11) </em> slow radix-based iterations fast comparison-based iterations).\n\n
|
||||
Some performance plots of runtime vs. n and log(range) are provided:\n
|
||||
<a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a>
|
||||
\n
|
||||
<a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a>
|
||||
|
||||
|
||||
\param[in] first Iterator pointer to first element.
|
||||
\param[in] last Iterator pointing to one beyond the end of data.
|
||||
\param[in] getchar Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset.
|
||||
\param[in] length Functor to get the length of the string in characters.
|
||||
\param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
|
||||
|
||||
|
||||
\pre [@c first, @c last) is a valid range.
|
||||
\pre @c RandomAccessIter @c value_type is mutable.
|
||||
\pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
|
||||
\post The elements in the range [@c first, @c last) are sorted in ascending order.
|
||||
|
||||
\return @c void.
|
||||
|
||||
\throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves),
|
||||
the right shift, subtraction of right-shifted elements, functors,
|
||||
or any operations on iterators throw.
|
||||
|
||||
\warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss.
|
||||
\warning Invalid arguments cause undefined behaviour.
|
||||
\note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type,
|
||||
enabling faster generic-programming.
|
||||
|
||||
\remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where:
|
||||
\remark * N is @c last - @c first,
|
||||
\remark * K is the log of the range in bits (32 for 32-bit integers using their full range),
|
||||
\remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size).
|
||||
|
||||
*/
|
||||
template <class RandomAccessIter, class Get_char, class Get_length,
|
||||
class Compare>
|
||||
inline void reverse_string_sort(RandomAccessIter first,
|
||||
RandomAccessIter last, Get_char getchar, Get_length length, Compare comp)
|
||||
{
|
||||
//Don't sort if it's too small to optimize
|
||||
if (last - first < detail::min_sort_size)
|
||||
std::sort(first, last, comp);
|
||||
else {
|
||||
//skipping past empties, which allows us to get the character type
|
||||
//.empty() is not used so as not to require a user declaration of it
|
||||
while (!length(*(--last))) {
|
||||
//If there is just one non-empty at the beginning, this is sorted
|
||||
if (first == last)
|
||||
return;
|
||||
}
|
||||
//making last just after the end of the non-empty part of the array
|
||||
detail::reverse_string_sort(first, last + 1, getchar, length, comp,
|
||||
getchar((*last), 0));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user