1/*
2 * The code of this file was taken from http://jeffreystedfast.blogspot.be,
3 * where it was posted in 2011 by Jeffrey Stedfast under the MIT license.
4 * The MIT license text is as follows:
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25#include <errno.h>
26#include <string.h>
27#include <stdlib.h>
28#include <isl_sort.h>
29
30#define MID(lo, hi) (lo + ((hi - lo) >> 1))
31
32/* The code here is an optimized merge sort. Starting from a generic merge sort
33 * the following optimizations were applied:
34 *
35 * o Batching of memcpy() calls: Instead of calling memcpy() to copy each and
36 * every element into a temporary buffer, blocks of elements are copied
37 * at a time.
38 *
39 * o To reduce the number of memcpy() calls further, copying leading
40 * and trailing elements into our temporary buffer is avoided, in case it is
41 * not necessary to merge them.
42 *
43 * A further optimization could be to specialize memcpy calls based on the
44 * size of the types we compare. For now, this code does not include the
45 * relevant optimization, as clang e.g. inlines a very efficient memcpy()
46 * implementation. It is not clear, that the specialized version as provided in
47 * the blog post, is really superior to the one that will be inlined by
48 * default. So we decided to keep the code simple until this optimization was
49 * proven to be beneficial.
50 */
51
52static void
53msort (void *array, void *buf, size_t low, size_t high, size_t size,
54 int (* compare) (const void *, const void *, void *), void *arg)
55{
56 char *a1, *al, *am, *ah, *ls, *hs, *lo, *hi, *b;
57 size_t copied = 0;
58 size_t mid;
59
60 mid = MID (low, high);
61
62 if (mid + 1 < high)
63 msort (array, buf, low: mid + 1, high, size, compare, arg);
64
65 if (mid > low)
66 msort (array, buf, low, high: mid, size, compare, arg);
67
68 ah = ((char *) array) + ((high + 1) * size);
69 am = ((char *) array) + ((mid + 1) * size);
70 a1 = al = ((char *) array) + (low * size);
71
72 b = (char *) buf;
73 lo = al;
74 hi = am;
75
76 do {
77 ls = lo;
78 hs = hi;
79
80 if (lo > al || hi > am) {
81 /* our last loop already compared lo & hi and found lo <= hi */
82 lo += size;
83 }
84
85 while (lo < am && compare (lo, hi, arg) <= 0)
86 lo += size;
87
88 if (lo < am) {
89 if (copied == 0) {
90 /* avoid copying the leading items */
91 a1 = lo;
92 ls = lo;
93 }
94
95 /* our last compare tells us hi < lo */
96 hi += size;
97
98 while (hi < ah && compare (hi, lo, arg) < 0)
99 hi += size;
100
101 if (lo > ls) {
102 memcpy (dest: b, src: ls, n: lo - ls);
103 copied += (lo - ls);
104 b += (lo - ls);
105 }
106
107 memcpy (dest: b, src: hs, n: hi - hs);
108 copied += (hi - hs);
109 b += (hi - hs);
110 } else if (copied) {
111 memcpy (dest: b, src: ls, n: lo - ls);
112 copied += (lo - ls);
113 b += (lo - ls);
114
115 /* copy everything we needed to re-order back into array */
116 memcpy (dest: a1, src: buf, n: copied);
117 return;
118 } else {
119 /* everything already in order */
120 return;
121 }
122 } while (hi < ah);
123
124 if (lo < am) {
125 memcpy (dest: b, src: lo, n: am - lo);
126 copied += (am - lo);
127 }
128
129 memcpy (dest: a1, src: buf, n: copied);
130}
131
132static int
133MergeSort (void *base, size_t nmemb, size_t size,
134 int (* compare) (const void *, const void *, void *), void *arg)
135{
136 void *tmp;
137
138 if (nmemb < 2)
139 return 0;
140
141 if (!(tmp = malloc (size: nmemb * size))) {
142 errno = ENOMEM;
143 return -1;
144 }
145
146 msort (array: base, buf: tmp, low: 0, high: nmemb - 1, size, compare, arg);
147
148 free (ptr: tmp);
149
150 return 0;
151}
152
153int isl_sort(void *const pbase, size_t total_elems, size_t size,
154 int (*cmp)(const void *, const void *, void *arg), void *arg)
155{
156 return MergeSort (base: pbase, nmemb: total_elems, size, compare: cmp, arg);
157}
158

source code of polly/lib/External/isl/isl_sort.c