1//----------------------------------------------------------------------------
2/// @file block_indirect_sort.hpp
3/// @brief block indirect sort algorithm
4///
5/// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
6/// Distributed under the Boost Software License, Version 1.0.\n
7/// ( See accompanying file LICENSE_1_0.txt or copy at
8/// http://www.boost.org/LICENSE_1_0.txt )
9/// @version 0.1
10///
11/// @remarks
12//-----------------------------------------------------------------------------
13#ifndef __BOOST_SORT_PARALLEL_DETAIL_BLOCK_INDIRECT_SORT_HPP
14#define __BOOST_SORT_PARALLEL_DETAIL_BLOCK_INDIRECT_SORT_HPP
15
16#include <ciso646>
17#include <atomic>
18#include <cstdlib>
19#include <future>
20#include <iterator>
21
22#include <boost/sort/block_indirect_sort/blk_detail/merge_blocks.hpp>
23#include <boost/sort/block_indirect_sort/blk_detail/move_blocks.hpp>
24#include <boost/sort/block_indirect_sort/blk_detail/parallel_sort.hpp>
25#include <boost/sort/pdqsort/pdqsort.hpp>
26#include <boost/sort/common/util/traits.hpp>
27#include <boost/sort/common/util/algorithm.hpp>
28
29
30// This value is the minimal number of threads for to use the
31// block_indirect_sort algorithm
32#define BOOST_NTHREAD_BORDER 6
33
34namespace boost
35{
36namespace sort
37{
38namespace blk_detail
39{
40//---------------------------------------------------------------------------
41// USING SENTENCES
42//---------------------------------------------------------------------------
43namespace bs = boost::sort;
44namespace bsc = bs::common;
45namespace bscu = bsc::util;
46using bscu::compare_iter;
47using bscu::value_iter;
48using bsc::range;
49using bsc::destroy;
50using bsc::initialize;
51using bscu::nbits64;
52using bs::pdqsort;
53using bscu::enable_if_string;
54using bscu::enable_if_not_string;
55using bscu::tmsb;
56//
57///---------------------------------------------------------------------------
58/// @struct block_indirect_sort
59/// @brief This class is the entry point of the block indirect sort. The code
60/// of this algorithm is divided in several classes:
61/// bis/block.hpp : basic structures used in the algorithm
62/// bis/backbone.hpp : data used by all the classes
63/// bis/merge_blocks.hpp : merge the internal blocks
64/// bis/move_blocks.hpp : move the blocks, and obtain all the elements
65/// phisicaly sorted
66/// bis/parallel_sort.hpp : make the parallel sort of each part in the
67/// initial division of the data
68///
69//----------------------------------------------------------------------------
70template<uint32_t Block_size, uint32_t Group_size, class Iter_t,
71 class Compare = compare_iter<Iter_t> >
72struct block_indirect_sort
73{
74 //------------------------------------------------------------------------
75 // D E F I N I T I O N S
76 //------------------------------------------------------------------------
77 typedef typename std::iterator_traits<Iter_t>::value_type value_t;
78 typedef std::atomic<uint32_t> atomic_t;
79 typedef range<size_t> range_pos;
80 typedef range<Iter_t> range_it;
81 typedef range<value_t *> range_buf;
82 typedef std::function<void(void)> function_t;
83
84 // classes used in the internal operations of the algorithm
85 typedef block_pos block_pos_t;
86 typedef block<Block_size, Iter_t> block_t;
87 typedef backbone<Block_size, Iter_t, Compare> backbone_t;
88 typedef parallel_sort<Block_size, Iter_t, Compare> parallel_sort_t;
89
90 typedef merge_blocks<Block_size, Group_size, Iter_t, Compare> merge_blocks_t;
91 typedef move_blocks<Block_size, Group_size, Iter_t, Compare> move_blocks_t;
92 typedef compare_block_pos<Block_size, Iter_t, Compare> compare_block_pos_t;
93 //
94 //------------------------------------------------------------------------
95 // V A R I A B L E S A N D C O N S T A N T S
96 //------------------------------------------------------------------------
97 // contains the data and the internal data structures of the algorithm for
98 // to be shared between the classes which are part of the algorithm
99 backbone_t bk;
100 // atomic counter for to detect the end of the works created inside
101 // the object
102 atomic_t counter;
103 // pointer to the uninitialized memory used for the thread buffers
104 value_t *ptr;
105 // indicate if the memory pointed by ptr is initialized
106 bool construct;
107 // range from extract the buffers for the threads
108 range_buf rglobal_buf;
109 // number of threads to use
110 uint32_t nthread;
111 //
112 //------------------------------------------------------------------------
113 // F U N C T I O N S
114 //------------------------------------------------------------------------
115
116 block_indirect_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t nthr);
117
118 block_indirect_sort(Iter_t first, Iter_t last) :
119 block_indirect_sort(first, last, Compare(),
120 std::thread::hardware_concurrency()) { }
121
122
123 block_indirect_sort(Iter_t first, Iter_t last, Compare cmp) :
124 block_indirect_sort(first, last, cmp,
125 std::thread::hardware_concurrency()) { }
126
127
128 block_indirect_sort(Iter_t first, Iter_t last, uint32_t nthread) :
129 block_indirect_sort(first, last, Compare(), nthread){}
130
131
132 //
133 //------------------------------------------------------------------------
134 // function :destroy_all
135 /// @brief destructor all the data structures of the class (if the memory
136 /// is constructed, is destroyed) and return the uninitialized
137 /// memory
138 //------------------------------------------------------------------------
139 void destroy_all(void)
140 {
141 if (ptr != nullptr)
142 {
143 if (construct)
144 {
145 destroy(rglobal_buf);
146 construct = false;
147 };
148 std::free (ptr: ptr);
149 ptr = nullptr;
150 };
151 }
152 //
153 //------------------------------------------------------------------------
154 // function :~block_indirect_sort
155 /// @brief destructor of the class (if the memory is constructed, is
156 /// destroyed) and return the uninitialized memory
157 //------------------------------------------------------------------------
158 ~block_indirect_sort(void)
159 {
160 destroy_all();
161 }
162
163 void split_range(size_t pos_index1, size_t pos_index2,
164 uint32_t level_thread);
165
166 void start_function(void);
167
168//-------------------------------------------------------------------------
169}; // End class block_indirect_sort
170//----------------------------------------------------------------------------
171//
172//############################################################################
173// ##
174// ##
175// N O N I N L I N E F U N C T I O N S ##
176// ##
177// ##
178//############################################################################
179//
180//-------------------------------------------------------------------------
181// function : block_indirect_sort
182/// @brief begin with the execution of the functions stored in works
183/// @param first : iterator to the first element of the range to sort
184/// @param last : iterator after the last element to the range to sort
185/// @param comp : object for to compare two elements pointed by Iter_t
186/// iterators
187/// @param nthr : Number of threads to use in the process.When this value
188/// is lower than 2, the sorting is done with 1 thread
189//-------------------------------------------------------------------------
190template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare>
191block_indirect_sort<Block_size, Group_size, Iter_t, Compare>
192::block_indirect_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t nthr)
193: bk(first, last, cmp), counter(0), ptr(nullptr), construct(false),
194 nthread(nthr)
195{
196 try
197 {
198 assert((last - first) >= 0);
199 size_t nelem = size_t(last - first);
200 if (nelem == 0) return;
201
202 //------------------- check if sort -----------------------------------
203 bool sorted = true;
204 for (Iter_t it1 = first, it2 = first + 1; it2 != last and (sorted =
205 not bk.cmp(*it2, *it1)); it1 = it2++);
206 if (sorted) return;
207
208 //------------------- check if reverse sort ---------------------------
209 sorted = true;
210 for (Iter_t it1 = first, it2 = first + 1; it2 != last and (sorted =
211 not bk.cmp(*it1, *it2)); it1 = it2++);
212
213 if (sorted)
214 {
215 size_t nelem2 = nelem >> 1;
216 Iter_t it1 = first, it2 = last - 1;
217 for (size_t i = 0; i < nelem2; ++i)
218 {
219 using std::swap;
220 swap(*(it1++), *(it2--));
221 };
222 return;
223 };
224
225 //---------------- check if only single thread -----------------------
226 size_t nthreadmax = nelem / (Block_size * Group_size) + 1;
227 if (nthread > nthreadmax) nthread = (uint32_t) nthreadmax;
228
229 uint32_t nbits_size = (nbits64(num: sizeof(value_t)) >> 1);
230 if (nbits_size > 5) nbits_size = 5;
231 size_t max_per_thread = (size_t) 1 << (18 - nbits_size);
232
233 if (nelem < (max_per_thread) or nthread < 2)
234 {
235 //intro_sort (first, last, bk.cmp);
236 pdqsort(first, last, bk.cmp);
237 return;
238 };
239
240 //----------- creation of the temporary buffer --------------------
241 ptr = reinterpret_cast <value_t*>
242 (std::malloc (size: Block_size * nthread * sizeof(value_t)));
243 if (ptr == nullptr)
244 {
245 bk.error = true;
246 throw std::bad_alloc();
247 };
248
249 rglobal_buf = range_buf(ptr, ptr + (Block_size * nthread));
250 initialize(rglobal_buf, *first);
251 construct = true;
252
253 // creation of the buffers for the threads
254 std::vector<value_t *> vbuf(nthread);
255 for (uint32_t i = 0; i < nthread; ++i)
256 {
257 vbuf[i] = ptr + (i * Block_size);
258 };
259
260 // Insert the first work in the stack
261 bscu::atomic_write(at_var&: counter, num: 1);
262 function_t f1 = [&]( )
263 {
264 start_function ( );
265 bscu::atomic_sub (at_var&: counter, num: 1);
266 };
267 bk.works.emplace_back(f1);
268
269 //---------------------------------------------------------------------
270 // PROCESS
271 //---------------------------------------------------------------------
272 std::vector<std::future<void> > vfuture(nthread);
273
274 // The function launched with the futures is "execute the functions of
275 // the stack until this->counter is zero
276 // vbuf[i] is the memory from the main thread for to configure the
277 // thread local buffer
278 for (uint32_t i = 0; i < nthread; ++i)
279 {
280 auto f1 = [&vbuf,i, this]( )
281 { bk.exec (vbuf[i], this->counter);};
282 vfuture[i] = std::async(std::launch::async, f1);
283 };
284 for (uint32_t i = 0; i < nthread; ++i)
285 vfuture[i].get();
286 if (bk.error) throw std::bad_alloc();
287 }
288 catch (std::bad_alloc &)
289 {
290 destroy_all();
291 throw;
292 }
293};
294//
295//-----------------------------------------------------------------------------
296// function : split_rage
297/// @brief this function splits a range of positions in the index, and
298/// depending of the size, sort directly or make to a recursive call
299/// to split_range
300/// @param pos_index1 : first position in the index
301/// @param pos_index2 : position after the last in the index
302/// @param level_thread : depth of the call. When 0 sort the blocks
303//-----------------------------------------------------------------------------
304template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare>
305void block_indirect_sort<Block_size, Group_size, Iter_t, Compare>
306::split_range(size_t pos_index1, size_t pos_index2, uint32_t level_thread)
307{
308 size_t nblock = pos_index2 - pos_index1;
309
310 //-------------------------------------------------------------------------
311 // In the blocks not sorted, the physical position is the logical position
312 //-------------------------------------------------------------------------
313 Iter_t first = bk.get_block(pos_index1).first;
314 Iter_t last = bk.get_range(pos_index2 - 1).last;
315
316 if (nblock < Group_size)
317 {
318 pdqsort(first, last, bk.cmp);
319 return;
320 };
321
322 size_t pos_index_mid = pos_index1 + (nblock >> 1);
323 atomic_t son_counter(1);
324
325 //-------------------------------------------------------------------------
326 // Insert in the stack the work for the second part, and the actual thread,
327 // execute the first part
328 //-------------------------------------------------------------------------
329 if (level_thread != 0)
330 {
331 auto f1 = [this, &son_counter, pos_index_mid,
332 pos_index2, level_thread]( )
333 {
334 split_range (pos_index1: pos_index_mid, pos_index2, level_thread: level_thread - 1);
335 bscu::atomic_sub (at_var&: son_counter, num: 1);
336 };
337 bk.works.emplace_back(f1);
338 if (bk.error) return;
339 split_range(pos_index1, pos_index2: pos_index_mid, level_thread: level_thread - 1);
340 }
341 else
342 {
343 Iter_t mid = first + ((nblock >> 1) * Block_size);
344 auto f1 = [this, &son_counter, mid, last]( )
345 {
346 parallel_sort_t (this->bk, mid, last);
347 bscu::atomic_sub (at_var&: son_counter, num: 1);
348 };
349 bk.works.emplace_back(f1);
350 if (bk.error) return;
351 parallel_sort_t(bk, first, mid);
352 };
353 bk.exec(son_counter);
354 if (bk.error) return;
355 merge_blocks_t(bk, pos_index1, pos_index_mid, pos_index2);
356};
357
358//
359//-----------------------------------------------------------------------------
360// function : start_function
361/// @brief this function init the process. When the number of threads is lower
362/// than a predefined value, sort the elements with a parallel pdqsort.
363//-----------------------------------------------------------------------------
364template<uint32_t Block_size, uint32_t Group_size, class Iter_t, class Compare>
365void block_indirect_sort<Block_size, Group_size, Iter_t, Compare>
366::start_function(void)
367{
368 if (nthread < BOOST_NTHREAD_BORDER)
369 {
370 parallel_sort_t(bk, bk.global_range.first, bk.global_range.last);
371 }
372 else
373 {
374 uint32_t level_thread = nbits64 (num: nthread - 1) - 1;
375 split_range(pos_index1: 0, pos_index2: bk.nblock, level_thread: level_thread - 1);
376 if (bk.error) return;
377 move_blocks_t k(bk);
378 };
379};
380
381///---------------------------------------------------------------------------
382// function block_indirect_sort_call
383/// @brief This class is select the block size in the block_indirect_sort
384/// algorithm depending of the type and size of the data to sort
385///
386//----------------------------------------------------------------------------
387template <class Iter_t, class Compare,
388 enable_if_string<value_iter<Iter_t>> * = nullptr>
389inline void block_indirect_sort_call(Iter_t first, Iter_t last, Compare cmp,
390 uint32_t nthr)
391{
392 block_indirect_sort<128, 128, Iter_t, Compare>(first, last, cmp, nthr);
393};
394
395template<size_t Size>
396struct block_size
397{
398 static constexpr const uint32_t BitsSize =
399 (Size == 0) ? 0 : (Size > 256) ? 9 : tmsb[Size - 1];
400 static constexpr const uint32_t sz[10] =
401 { 4096, 4096, 4096, 4096, 2048, 1024, 768, 512, 256, 128 };
402 static constexpr const uint32_t data = sz[BitsSize];
403};
404//
405///---------------------------------------------------------------------------
406/// @struct block_indirect_sort_call
407/// @brief This class is select the block size in the block_indirect_sort
408/// algorithm depending of the type and size of the data to sort
409///
410//----------------------------------------------------------------------------
411template <class Iter_t, class Compare,
412 enable_if_not_string<value_iter<Iter_t>> * = nullptr>
413inline void block_indirect_sort_call (Iter_t first, Iter_t last, Compare cmp,
414 uint32_t nthr)
415{
416 block_indirect_sort<block_size<sizeof (value_iter<Iter_t> )>::data, 64,
417 Iter_t, Compare> (first, last, cmp, nthr);
418};
419
420//
421//****************************************************************************
422}; // End namespace blk_detail
423//****************************************************************************
424//
425namespace bscu = boost::sort::common::util;
426//
427//############################################################################
428// ##
429// ##
430// B L O C K _ I N D I R E C T _ S O R T ##
431// ##
432// ##
433//############################################################################
434//
435//-----------------------------------------------------------------------------
436// function : block_indirect_sort
437/// @brief invocation of block_indirtect_sort with 2 parameters
438///
439/// @param first : iterator to the first element of the range to sort
440/// @param last : iterator after the last element to the range to sort
441//-----------------------------------------------------------------------------
442template<class Iter_t>
443void block_indirect_sort(Iter_t first, Iter_t last)
444{
445 typedef bscu::compare_iter<Iter_t> Compare;
446 blk_detail::block_indirect_sort_call (first, last, Compare(),
447 std::thread::hardware_concurrency());
448}
449
450//
451//-----------------------------------------------------------------------------
452// function : block_indirect_sort
453/// @brief invocation of block_indirtect_sort with 3 parameters. The third is
454/// the number of threads
455///
456/// @param first : iterator to the first element of the range to sort
457/// @param last : iterator after the last element to the range to sort
458/// @param nthread : Number of threads to use in the process. When this value
459/// is lower than 2, the sorting is done with 1 thread
460//-----------------------------------------------------------------------------
461template<class Iter_t>
462void block_indirect_sort(Iter_t first, Iter_t last, uint32_t nthread)
463{
464 typedef bscu::compare_iter<Iter_t> Compare;
465 blk_detail::block_indirect_sort_call(first, last, Compare(), nthread);
466}
467//
468//-----------------------------------------------------------------------------
469// function : block_indirect_sort
470/// @brief invocation of block_indirtect_sort with 3 parameters. The third is
471/// the comparison object
472///
473/// @param first : iterator to the first element of the range to sort
474/// @param last : iterator after the last element to the range to sort
475/// @param comp : object for to compare two elements pointed by Iter_t
476/// iterators
477//-----------------------------------------------------------------------------
478template <class Iter_t, class Compare,
479 bscu::enable_if_not_integral<Compare> * = nullptr>
480void block_indirect_sort(Iter_t first, Iter_t last, Compare comp)
481{
482 blk_detail::block_indirect_sort_call (first, last, comp,
483 std::thread::hardware_concurrency());
484}
485
486//
487//-----------------------------------------------------------------------------
488// function : block_indirect_sort
489/// @brief invocation of block_indirtect_sort with 4 parameters.
490///
491/// @param first : iterator to the first element of the range to sort
492/// @param last : iterator after the last element to the range to sort
493/// @param comp : object for to compare two elements pointed by Iter_t
494/// iterators
495/// @param nthread : Number of threads to use in the process. When this value
496/// is lower than 2, the sorting is done with 1 thread
497//-----------------------------------------------------------------------------
498template<class Iter_t, class Compare>
499void block_indirect_sort (Iter_t first, Iter_t last, Compare comp,
500 uint32_t nthread)
501{
502 blk_detail::block_indirect_sort_call(first, last, comp, nthread);
503}
504//
505//****************************************************************************
506}; // End namespace sort
507}; // End namespace boost
508//****************************************************************************
509//
510#endif
511

source code of boost/libs/sort/include/boost/sort/block_indirect_sort/block_indirect_sort.hpp