gtktimsort.c source code [gtk/gtk/timsort/gtktimsort.c]

1	/ Lots of code for an adaptive, stable, natural mergesort. There are many*
2	* pieces to this algorithm; read listsort.txt for overviews and details.
3	*/
4
5	#include "config.h"
6
7	#include "gtktimsortprivate.h"
8
9	/*
10	* This is the minimum sized sequence that will be merged. Shorter
11	* sequences will be lengthened by calling binarySort. If the entire
12	* array is less than this length, no merges will be performed.
13	*
14	* This constant should be a power of two. It was 64 in Tim Peter's C
15	* implementation, but 32 was empirically determined to work better in
16	* [Android's Java] implementation. In the unlikely event that you set
17	* this constant to be a number that's not a power of two, you'll need
18	* to change the compute_min_run() computation.
19	*
20	* If you decrease this constant, you must change the
21	* GTK_TIM_SORT_MAX_PENDING value, or you risk running out of space.
22	* See Python's listsort.txt for a discussion of the minimum stack
23	* length required as a function of the length of the array being sorted and
24	* the minimum merge sequence length.
25	*/
26	#define MIN_MERGE 32
27
28	/*
29	* When we get into galloping mode, we stay there until both runs win less
30	* often than MIN_GALLOP consecutive times.
31	*/
32	#define MIN_GALLOP 7
33
34	/*
35	* Returns the minimum acceptable run length for an array of the specified
36	* length. Natural runs shorter than this will be extended with binary sort.
37	*
38	* Roughly speaking, the computation is:
39	*
40	* If n < MIN_MERGE, return n (it's too small to bother with fancy stuff).
41	* Else if n is an exact power of 2, return MIN_MERGE/2.
42	* Else return an int k, MIN_MERGE/2 <= k <= MIN_MERGE, such that n/k
43	* is close to, but strictly less than, an exact power of 2.
44	*
45	* For the rationale, see listsort.txt.
46	*
47	* @param n the length of the array to be sorted
48	* @return the length of the minimum run to be merged
49	*/
50	static gsize
51	compute_min_run (gsize n)
52	{
53	gsize r = `0`; // Becomes 1 if any 1 bits are shifted off
54
55	while (n >= MIN_MERGE) {
56	r \|= (n & `1`);
57	n >>= `1`;
58	}
59	return n + r;
60	}
61
62	void
63	gtk_tim_sort_init (GtkTimSort *self,
64	gpointer base,
65	gsize size,
66	gsize element_size,
67	GCompareDataFunc compare_func,
68	gpointer data)
69	{
70	self->element_size = element_size;
71	self->base = base;
72	self->size = size;
73	self->compare_func = compare_func;
74	self->data = data;
75
76	self->min_gallop = MIN_GALLOP;
77	self->max_merge_size = G_MAXSIZE;
78	self->min_run = compute_min_run (n: size);
79
80	self->tmp = NULL;
81	self->tmp_length = `0`;
82	self->pending_runs = `0`;
83	}
84
85	void
86	gtk_tim_sort_finish (GtkTimSort *self)
87	{
88	g_clear_pointer (&self->tmp, g_free);
89	}
90
91	void
92	gtk_tim_sort (gpointer base,
93	gsize size,
94	gsize element_size,
95	GCompareDataFunc compare_func,
96	gpointer user_data)
97	{
98	GtkTimSort self;
99
100	gtk_tim_sort_init (self: &self, base, size, element_size, compare_func, data: user_data);
101
102	while (gtk_tim_sort_step (self: &self, NULL));
103
104	gtk_tim_sort_finish (self: &self);
105	}
106
107	static inline int
108	gtk_tim_sort_compare (GtkTimSort *self,
109	gpointer a,
110	gpointer b)
111	{
112	return self->compare_func (a, b, self->data);
113	}
114
115
116	/**
117	* Pushes the specified run onto the pending-run stack.
118	*
119	* @param runBase index of the first element in the run
120	* @param runLen the number of elements in the run
121	*/
122	static void
123	gtk_tim_sort_push_run (GtkTimSort *self,
124	void *base,
125	gsize len)
126	{
127	g_assert (self->pending_runs < GTK_TIM_SORT_MAX_PENDING);
128	g_assert (len <= self->size);
129
130	self->run[self->pending_runs].base = base;
131	self->run[self->pending_runs].len = len;
132	self->pending_runs++;
133
134	/ Advance to find next run /
135	self->base = ((char ) self->base) + len self->element_size;
136	self->size -= len;
137	}
138
139	/**
140	* Ensures that the external array tmp has at least the specified
141	* number of elements, increasing its size if necessary. The size
142	* increases exponentially to ensure amortized linear time complexity.
143	*
144	* @param min_capacity the minimum required capacity of the tmp array
145	* @return tmp, whether or not it grew
146	*/
147	static gpointer
148	gtk_tim_sort_ensure_capacity (GtkTimSort *self,
149	gsize min_capacity)
150	{
151	if (self->tmp_length < min_capacity)
152	{
153	/ Compute smallest power of 2 > min_capacity /
154	gsize new_size = min_capacity;
155	new_size \|= new_size >> `1`;
156	new_size \|= new_size >> `2`;
157	new_size \|= new_size >> `4`;
158	new_size \|= new_size >> `8`;
159	new_size \|= new_size >> `16`;
160	if (sizeof(new_size) > `4`)
161	new_size \|= new_size >> `32`;
162
163	new_size++;
164	if (new_size == `0`) / (overflow) Not bloody likely! /
165	new_size = min_capacity;
166
167	g_free (mem: self->tmp);
168	self->tmp_length = new_size;
169	self->tmp = g_malloc (n_bytes: self->tmp_length * self->element_size);
170	}
171
172	return self->tmp;
173	}
174
175	static void
176	gtk_tim_sort_set_change (GtkTimSortRun *out_change,
177	gpointer base,
178	gsize len)
179	{
180	if (out_change)
181	{
182	out_change->base = base;
183	out_change->len = len;
184	}
185	}
186
187	/<private>*
188	* gtk_tim_sort_get_runs:
189	* @self: a GtkTimSort
190	* @runs: (out) (caller-allocates): Place to store the 0-terminated list of
191	* runs
192	*
193	* Stores the already presorted list of runs - ranges of items that are
194	* known to be sorted among themselves.
195	*
196	* This can be used with gtk_tim_sort_set_runs() when resuming a sort later.
197	**/
198	void
199	gtk_tim_sort_get_runs (GtkTimSort *self,
200	gsize runs[GTK_TIM_SORT_MAX_PENDING + `1`])
201	{
202	gsize i;
203
204	g_return_if_fail (self);
205	g_return_if_fail (runs);
206
207	for (i = `0`; i < self->pending_runs; i++)
208	runs[i] = self->run[i].len;
209
210	runs[self->pending_runs] = `0`;
211	}
212
213	/<private>*
214	* gtk_tim_sort_set_runs:
215	* @self: a freshly initialized GtkTimSort
216	* @runs: (array length=zero-terminated): a 0-terminated list of runs
217	*
218	* Sets the list of runs. A run is a range of items that are already
219	* sorted correctly among themselves. Runs must appear at the beginning of
220	* the array.
221	*
222	* Runs can only be set at the beginning of the sort operation.
223	**/
224	void
225	gtk_tim_sort_set_runs (GtkTimSort *self,
226	gsize *runs)
227	{
228	gsize i;
229
230	g_return_if_fail (self);
231	g_return_if_fail (self->pending_runs == `0`);
232
233	for (i = `0`; runs[i] != `0`; i++)
234	gtk_tim_sort_push_run (self, base: self->base, len: runs[i]);
235	}
236
237	/*
238	* gtk_tim_sort_set_max_merge_size:
239	* @self: a GtkTimSort
240	* @max_merge_size: Maximum size of a merge step, 0 for unlimited
241	*
242	* Sets the maximum size of a merge step. Every time
243	* gtk_tim_sort_step() is called and a merge operation has to be
244	* done, the @max_merge_size will be used to limit the size of
245	* the merge.
246	*
247	* The benefit is that merges happen faster, and if you're using
248	* an incremental sorting algorithm in the main thread, this will
249	* limit the runtime.
250	*
251	* The disadvantage is that setting up merges is expensive and that
252	* various optimizations benefit from larger merges, so the total
253	* runtime of the sorting will increase with the number of merges.
254	*
255	* A good estimate is to set a @max_merge_size to 1024 for around
256	* 1ms runtimes, if your compare function is fast.
257	*
258	* By default, max_merge_size is set to unlimited.
259	**/
260	void
261	gtk_tim_sort_set_max_merge_size (GtkTimSort *self,
262	gsize max_merge_size)
263	{
264	g_return_if_fail (self != NULL);
265
266	if (max_merge_size == `0`)
267	max_merge_size = G_MAXSIZE;
268	self->max_merge_size = max_merge_size;
269	}
270
271	/**
272	* gtk_tim_sort_get_progress:
273	* @self: a GtkTimSort
274	*
275	* Does a progress estimate about sort progress, estimates relative
276	* to the number of items to sort.
277	*
278	* Note that this is entirely a progress estimate and does not have
279	* a relationship with items put in their correct place.
280	* It is also an estimate, so no guarantees are made about accuracy,
281	* other than that it will only report 100% completion when it is
282	* indeed done sorting.
283	*
284	* To get a percentage, you need to divide this number by the total
285	* number of elements that are being sorted.
286	*
287	* Returns: Rough guess of sort progress
288	**/
289	gsize
290	gtk_tim_sort_get_progress (GtkTimSort *self)
291	{
292	#define DEPTH 4
293	gsize i;
294	gsize last, progress;
295
296	g_return_val_if_fail (self != NULL, `0`);
297
298	if (self->pending_runs == `0`)
299	return `0`;
300
301	last = self->run[`0`].len;
302	progress = `0`;
303
304	for (i = `1`; i < DEPTH + `1` && i < self->pending_runs; i++)
305	{
306	progress += (DEPTH + `1` - i) * MAX (last, self->run[i].len);
307	last = MIN (last, self->run[i].len);
308	}
309	if (i < DEPTH + `1`)
310	progress += (DEPTH + `1` - i) * last;
311
312	return progress / DEPTH;
313	#undef DEPTH
314	}
315
316	#if 1
317	#define WIDTH 4
318	#include "gtktimsort-impl.c"
319
320	#define WIDTH 8
321	#include "gtktimsort-impl.c"
322
323	#define WIDTH 16
324	#include "gtktimsort-impl.c"
325	#endif
326
327	#define NAME default
328	#define WIDTH (self->element_size)
329	#include "gtktimsort-impl.c"
330
331	/*
332	* gtk_tim_sort_step:
333	* @self: a GtkTimSort
334	* @out_change: (optional): Return location for changed
335	* area. If a change did not cause any changes (for example,
336	* if an already sorted array gets sorted), out_change
337	* will be set to %NULL and 0.
338	*
339	* Performs another step in the sorting process. If a
340	* step was performed, %TRUE is returned and @out_change is
341	* set to the smallest area that contains all changes while
342	* sorting.
343	*
344	* If the data is completely sorted, %FALSE will be
345	* returned.
346	*
347	* Returns: %TRUE if an action was performed
348	**/
349	gboolean
350	gtk_tim_sort_step (GtkTimSort *self,
351	GtkTimSortRun *out_change)
352	{
353	gboolean result;
354
355	g_assert (self);
356
357	switch (self->element_size)
358	{
359	case `4`:
360	result = gtk_tim_sort_step_4 (self, out_change);
361	break;
362	case `8`:
363	result = gtk_tim_sort_step_8 (self, out_change);
364	break;
365	case `16`:
366	result = gtk_tim_sort_step_16 (self, out_change);
367	break;
368	default:
369	result = gtk_tim_sort_step_default (self, out_change);
370	break;
371	}
372
373	return result;
374	}
375
376

source code of gtk/gtk/timsort/gtktimsort.c