1/* Lots of code for an adaptive, stable, natural mergesort. There are many
2 * pieces to this algorithm; read listsort.txt for overviews and details.
3 */
4
5#include "config.h"
6
7#include "gtktimsortprivate.h"
8
9/*
10 * This is the minimum sized sequence that will be merged. Shorter
11 * sequences will be lengthened by calling binarySort. If the entire
12 * array is less than this length, no merges will be performed.
13 *
14 * This constant should be a power of two. It was 64 in Tim Peter's C
15 * implementation, but 32 was empirically determined to work better in
16 * [Android's Java] implementation. In the unlikely event that you set
17 * this constant to be a number that's not a power of two, you'll need
18 * to change the compute_min_run() computation.
19 *
20 * If you decrease this constant, you must change the
21 * GTK_TIM_SORT_MAX_PENDING value, or you risk running out of space.
22 * See Python's listsort.txt for a discussion of the minimum stack
23 * length required as a function of the length of the array being sorted and
24 * the minimum merge sequence length.
25 */
26#define MIN_MERGE 32
27
28/*
29 * When we get into galloping mode, we stay there until both runs win less
30 * often than MIN_GALLOP consecutive times.
31 */
32#define MIN_GALLOP 7
33
34/*
35 * Returns the minimum acceptable run length for an array of the specified
36 * length. Natural runs shorter than this will be extended with binary sort.
37 *
38 * Roughly speaking, the computation is:
39 *
40 * If n < MIN_MERGE, return n (it's too small to bother with fancy stuff).
41 * Else if n is an exact power of 2, return MIN_MERGE/2.
42 * Else return an int k, MIN_MERGE/2 <= k <= MIN_MERGE, such that n/k
43 * is close to, but strictly less than, an exact power of 2.
44 *
45 * For the rationale, see listsort.txt.
46 *
47 * @param n the length of the array to be sorted
48 * @return the length of the minimum run to be merged
49 */
50static gsize
51compute_min_run (gsize n)
52{
53 gsize r = 0; // Becomes 1 if any 1 bits are shifted off
54
55 while (n >= MIN_MERGE) {
56 r |= (n & 1);
57 n >>= 1;
58 }
59 return n + r;
60}
61
62void
63gtk_tim_sort_init (GtkTimSort *self,
64 gpointer base,
65 gsize size,
66 gsize element_size,
67 GCompareDataFunc compare_func,
68 gpointer data)
69{
70 self->element_size = element_size;
71 self->base = base;
72 self->size = size;
73 self->compare_func = compare_func;
74 self->data = data;
75
76 self->min_gallop = MIN_GALLOP;
77 self->max_merge_size = G_MAXSIZE;
78 self->min_run = compute_min_run (n: size);
79
80 self->tmp = NULL;
81 self->tmp_length = 0;
82 self->pending_runs = 0;
83}
84
85void
86gtk_tim_sort_finish (GtkTimSort *self)
87{
88 g_clear_pointer (&self->tmp, g_free);
89}
90
91void
92gtk_tim_sort (gpointer base,
93 gsize size,
94 gsize element_size,
95 GCompareDataFunc compare_func,
96 gpointer user_data)
97{
98 GtkTimSort self;
99
100 gtk_tim_sort_init (self: &self, base, size, element_size, compare_func, data: user_data);
101
102 while (gtk_tim_sort_step (self: &self, NULL));
103
104 gtk_tim_sort_finish (self: &self);
105}
106
107static inline int
108gtk_tim_sort_compare (GtkTimSort *self,
109 gpointer a,
110 gpointer b)
111{
112 return self->compare_func (a, b, self->data);
113}
114
115
116/**
117 * Pushes the specified run onto the pending-run stack.
118 *
119 * @param runBase index of the first element in the run
120 * @param runLen the number of elements in the run
121 */
122static void
123gtk_tim_sort_push_run (GtkTimSort *self,
124 void *base,
125 gsize len)
126{
127 g_assert (self->pending_runs < GTK_TIM_SORT_MAX_PENDING);
128 g_assert (len <= self->size);
129
130 self->run[self->pending_runs].base = base;
131 self->run[self->pending_runs].len = len;
132 self->pending_runs++;
133
134 /* Advance to find next run */
135 self->base = ((char *) self->base) + len * self->element_size;
136 self->size -= len;
137}
138
139/**
140 * Ensures that the external array tmp has at least the specified
141 * number of elements, increasing its size if necessary. The size
142 * increases exponentially to ensure amortized linear time complexity.
143 *
144 * @param min_capacity the minimum required capacity of the tmp array
145 * @return tmp, whether or not it grew
146 */
147static gpointer
148gtk_tim_sort_ensure_capacity (GtkTimSort *self,
149 gsize min_capacity)
150{
151 if (self->tmp_length < min_capacity)
152 {
153 /* Compute smallest power of 2 > min_capacity */
154 gsize new_size = min_capacity;
155 new_size |= new_size >> 1;
156 new_size |= new_size >> 2;
157 new_size |= new_size >> 4;
158 new_size |= new_size >> 8;
159 new_size |= new_size >> 16;
160 if (sizeof(new_size) > 4)
161 new_size |= new_size >> 32;
162
163 new_size++;
164 if (new_size == 0) /* (overflow) Not bloody likely! */
165 new_size = min_capacity;
166
167 g_free (mem: self->tmp);
168 self->tmp_length = new_size;
169 self->tmp = g_malloc (n_bytes: self->tmp_length * self->element_size);
170 }
171
172 return self->tmp;
173}
174
175static void
176gtk_tim_sort_set_change (GtkTimSortRun *out_change,
177 gpointer base,
178 gsize len)
179{
180 if (out_change)
181 {
182 out_change->base = base;
183 out_change->len = len;
184 }
185}
186
187/*<private>
188 * gtk_tim_sort_get_runs:
189 * @self: a GtkTimSort
190 * @runs: (out) (caller-allocates): Place to store the 0-terminated list of
191 * runs
192 *
193 * Stores the already presorted list of runs - ranges of items that are
194 * known to be sorted among themselves.
195 *
196 * This can be used with gtk_tim_sort_set_runs() when resuming a sort later.
197 **/
198void
199gtk_tim_sort_get_runs (GtkTimSort *self,
200 gsize runs[GTK_TIM_SORT_MAX_PENDING + 1])
201{
202 gsize i;
203
204 g_return_if_fail (self);
205 g_return_if_fail (runs);
206
207 for (i = 0; i < self->pending_runs; i++)
208 runs[i] = self->run[i].len;
209
210 runs[self->pending_runs] = 0;
211}
212
213/*<private>
214 * gtk_tim_sort_set_runs:
215 * @self: a freshly initialized GtkTimSort
216 * @runs: (array length=zero-terminated): a 0-terminated list of runs
217 *
218 * Sets the list of runs. A run is a range of items that are already
219 * sorted correctly among themselves. Runs must appear at the beginning of
220 * the array.
221 *
222 * Runs can only be set at the beginning of the sort operation.
223 **/
224void
225gtk_tim_sort_set_runs (GtkTimSort *self,
226 gsize *runs)
227{
228 gsize i;
229
230 g_return_if_fail (self);
231 g_return_if_fail (self->pending_runs == 0);
232
233 for (i = 0; runs[i] != 0; i++)
234 gtk_tim_sort_push_run (self, base: self->base, len: runs[i]);
235}
236
237/*
238 * gtk_tim_sort_set_max_merge_size:
239 * @self: a GtkTimSort
240 * @max_merge_size: Maximum size of a merge step, 0 for unlimited
241 *
242 * Sets the maximum size of a merge step. Every time
243 * gtk_tim_sort_step() is called and a merge operation has to be
244 * done, the @max_merge_size will be used to limit the size of
245 * the merge.
246 *
247 * The benefit is that merges happen faster, and if you're using
248 * an incremental sorting algorithm in the main thread, this will
249 * limit the runtime.
250 *
251 * The disadvantage is that setting up merges is expensive and that
252 * various optimizations benefit from larger merges, so the total
253 * runtime of the sorting will increase with the number of merges.
254 *
255 * A good estimate is to set a @max_merge_size to 1024 for around
256 * 1ms runtimes, if your compare function is fast.
257 *
258 * By default, max_merge_size is set to unlimited.
259 **/
260void
261gtk_tim_sort_set_max_merge_size (GtkTimSort *self,
262 gsize max_merge_size)
263{
264 g_return_if_fail (self != NULL);
265
266 if (max_merge_size == 0)
267 max_merge_size = G_MAXSIZE;
268 self->max_merge_size = max_merge_size;
269}
270
271/**
272 * gtk_tim_sort_get_progress:
273 * @self: a GtkTimSort
274 *
275 * Does a progress estimate about sort progress, estimates relative
276 * to the number of items to sort.
277 *
278 * Note that this is entirely a progress estimate and does not have
279 * a relationship with items put in their correct place.
280 * It is also an estimate, so no guarantees are made about accuracy,
281 * other than that it will only report 100% completion when it is
282 * indeed done sorting.
283 *
284 * To get a percentage, you need to divide this number by the total
285 * number of elements that are being sorted.
286 *
287 * Returns: Rough guess of sort progress
288 **/
289gsize
290gtk_tim_sort_get_progress (GtkTimSort *self)
291{
292#define DEPTH 4
293 gsize i;
294 gsize last, progress;
295
296 g_return_val_if_fail (self != NULL, 0);
297
298 if (self->pending_runs == 0)
299 return 0;
300
301 last = self->run[0].len;
302 progress = 0;
303
304 for (i = 1; i < DEPTH + 1 && i < self->pending_runs; i++)
305 {
306 progress += (DEPTH + 1 - i) * MAX (last, self->run[i].len);
307 last = MIN (last, self->run[i].len);
308 }
309 if (i < DEPTH + 1)
310 progress += (DEPTH + 1 - i) * last;
311
312 return progress / DEPTH;
313#undef DEPTH
314}
315
316#if 1
317#define WIDTH 4
318#include "gtktimsort-impl.c"
319
320#define WIDTH 8
321#include "gtktimsort-impl.c"
322
323#define WIDTH 16
324#include "gtktimsort-impl.c"
325#endif
326
327#define NAME default
328#define WIDTH (self->element_size)
329#include "gtktimsort-impl.c"
330
331/*
332 * gtk_tim_sort_step:
333 * @self: a GtkTimSort
334 * @out_change: (optional): Return location for changed
335 * area. If a change did not cause any changes (for example,
336 * if an already sorted array gets sorted), out_change
337 * will be set to %NULL and 0.
338 *
339 * Performs another step in the sorting process. If a
340 * step was performed, %TRUE is returned and @out_change is
341 * set to the smallest area that contains all changes while
342 * sorting.
343 *
344 * If the data is completely sorted, %FALSE will be
345 * returned.
346 *
347 * Returns: %TRUE if an action was performed
348 **/
349gboolean
350gtk_tim_sort_step (GtkTimSort *self,
351 GtkTimSortRun *out_change)
352{
353 gboolean result;
354
355 g_assert (self);
356
357 switch (self->element_size)
358 {
359 case 4:
360 result = gtk_tim_sort_step_4 (self, out_change);
361 break;
362 case 8:
363 result = gtk_tim_sort_step_8 (self, out_change);
364 break;
365 case 16:
366 result = gtk_tim_sort_step_16 (self, out_change);
367 break;
368 default:
369 result = gtk_tim_sort_step_default (self, out_change);
370 break;
371 }
372
373 return result;
374}
375
376

source code of gtk/gtk/timsort/gtktimsort.c