1 | /* |
2 | * The code of this file was taken from http://jeffreystedfast.blogspot.be, |
3 | * where it was posted in 2011 by Jeffrey Stedfast under the MIT license. |
4 | * The MIT license text is as follows: |
5 | * |
6 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
7 | * of this software and associated documentation files (the "Software"), to |
8 | * deal in the Software without restriction, including without limitation the |
9 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
10 | * sell copies of the Software, and to permit persons to whom the Software is |
11 | * furnished to do so, subject to the following conditions: |
12 | * |
13 | * The above copyright notice and this permission notice shall be included in |
14 | * all copies or substantial portions of the Software. |
15 | * |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
22 | * IN THE SOFTWARE. |
23 | */ |
24 | |
25 | #include <errno.h> |
26 | #include <string.h> |
27 | #include <stdlib.h> |
28 | #include <isl_sort.h> |
29 | |
30 | #define MID(lo, hi) (lo + ((hi - lo) >> 1)) |
31 | |
32 | /* The code here is an optimized merge sort. Starting from a generic merge sort |
33 | * the following optimizations were applied: |
34 | * |
35 | * o Batching of memcpy() calls: Instead of calling memcpy() to copy each and |
36 | * every element into a temporary buffer, blocks of elements are copied |
37 | * at a time. |
38 | * |
39 | * o To reduce the number of memcpy() calls further, copying leading |
40 | * and trailing elements into our temporary buffer is avoided, in case it is |
41 | * not necessary to merge them. |
42 | * |
43 | * A further optimization could be to specialize memcpy calls based on the |
44 | * size of the types we compare. For now, this code does not include the |
45 | * relevant optimization, as clang e.g. inlines a very efficient memcpy() |
46 | * implementation. It is not clear, that the specialized version as provided in |
47 | * the blog post, is really superior to the one that will be inlined by |
48 | * default. So we decided to keep the code simple until this optimization was |
49 | * proven to be beneficial. |
50 | */ |
51 | |
52 | static void |
53 | msort (void *array, void *buf, size_t low, size_t high, size_t size, |
54 | int (* compare) (const void *, const void *, void *), void *arg) |
55 | { |
56 | char *a1, *al, *am, *ah, *ls, *hs, *lo, *hi, *b; |
57 | size_t copied = 0; |
58 | size_t mid; |
59 | |
60 | mid = MID (low, high); |
61 | |
62 | if (mid + 1 < high) |
63 | msort (array, buf, low: mid + 1, high, size, compare, arg); |
64 | |
65 | if (mid > low) |
66 | msort (array, buf, low, high: mid, size, compare, arg); |
67 | |
68 | ah = ((char *) array) + ((high + 1) * size); |
69 | am = ((char *) array) + ((mid + 1) * size); |
70 | a1 = al = ((char *) array) + (low * size); |
71 | |
72 | b = (char *) buf; |
73 | lo = al; |
74 | hi = am; |
75 | |
76 | do { |
77 | ls = lo; |
78 | hs = hi; |
79 | |
80 | if (lo > al || hi > am) { |
81 | /* our last loop already compared lo & hi and found lo <= hi */ |
82 | lo += size; |
83 | } |
84 | |
85 | while (lo < am && compare (lo, hi, arg) <= 0) |
86 | lo += size; |
87 | |
88 | if (lo < am) { |
89 | if (copied == 0) { |
90 | /* avoid copying the leading items */ |
91 | a1 = lo; |
92 | ls = lo; |
93 | } |
94 | |
95 | /* our last compare tells us hi < lo */ |
96 | hi += size; |
97 | |
98 | while (hi < ah && compare (hi, lo, arg) < 0) |
99 | hi += size; |
100 | |
101 | if (lo > ls) { |
102 | memcpy (dest: b, src: ls, n: lo - ls); |
103 | copied += (lo - ls); |
104 | b += (lo - ls); |
105 | } |
106 | |
107 | memcpy (dest: b, src: hs, n: hi - hs); |
108 | copied += (hi - hs); |
109 | b += (hi - hs); |
110 | } else if (copied) { |
111 | memcpy (dest: b, src: ls, n: lo - ls); |
112 | copied += (lo - ls); |
113 | b += (lo - ls); |
114 | |
115 | /* copy everything we needed to re-order back into array */ |
116 | memcpy (dest: a1, src: buf, n: copied); |
117 | return; |
118 | } else { |
119 | /* everything already in order */ |
120 | return; |
121 | } |
122 | } while (hi < ah); |
123 | |
124 | if (lo < am) { |
125 | memcpy (dest: b, src: lo, n: am - lo); |
126 | copied += (am - lo); |
127 | } |
128 | |
129 | memcpy (dest: a1, src: buf, n: copied); |
130 | } |
131 | |
132 | static int |
133 | MergeSort (void *base, size_t nmemb, size_t size, |
134 | int (* compare) (const void *, const void *, void *), void *arg) |
135 | { |
136 | void *tmp; |
137 | |
138 | if (nmemb < 2) |
139 | return 0; |
140 | |
141 | if (!(tmp = malloc (size: nmemb * size))) { |
142 | errno = ENOMEM; |
143 | return -1; |
144 | } |
145 | |
146 | msort (array: base, buf: tmp, low: 0, high: nmemb - 1, size, compare, arg); |
147 | |
148 | free (ptr: tmp); |
149 | |
150 | return 0; |
151 | } |
152 | |
153 | int isl_sort(void *const pbase, size_t total_elems, size_t size, |
154 | int (*cmp)(const void *, const void *, void *arg), void *arg) |
155 | { |
156 | return MergeSort (base: pbase, nmemb: total_elems, size, compare: cmp, arg); |
157 | } |
158 | |