1/*
2 * Amalgamated copy of CRoaring 0.2.66, modified for GTK to reduce compiler
3 * warnings.
4 *
5 * Copyright 2016-2020 The CRoaring authors
6 * Copyright 2020 Benjamin Otte
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 * SPDX-License-Identifier: Apache-2.0
21 */
22
23#include "roaring.h"
24
25/* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */
26#ifdef DMALLOC
27#include "dmalloc.h"
28#endif
29
30/* begin file src/array_util.c */
31#include <assert.h>
32#include <stdbool.h>
33#include <stdint.h>
34#include <stdio.h>
35#include <stdlib.h>
36#include <string.h>
37
38
39#ifdef USESSE4
40// used by intersect_vector16
41ALIGNED(0x1000)
42static const uint8_t shuffle_mask16[] = {
43 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
44 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
45 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 0xFF, 0xFF,
46 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
47 0, 1, 2, 3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
48 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
49 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
50 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
51 2, 3, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
52 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 0xFF, 0xFF,
53 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 0xFF, 0xFF,
54 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
55 0, 1, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
56 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF,
57 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
58 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
59 4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
60 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 0xFF, 0xFF,
61 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
62 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
63 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF,
64 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
65 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9,
66 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
67 2, 3, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
68 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 0xFF, 0xFF,
69 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9,
70 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
71 0, 1, 4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
72 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 0xFF, 0xFF,
73 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
74 4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
75 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
76 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 0xFF, 0xFF,
77 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7,
78 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
79 0, 1, 2, 3, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF,
80 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF,
81 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
82 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
83 2, 3, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF,
84 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7,
85 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 0xFF, 0xFF,
86 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
87 0, 1, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
88 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
89 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
90 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
91 4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
92 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 0xFF, 0xFF,
93 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
94 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
95 0, 1, 2, 3, 4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
96 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
97 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7,
98 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
99 2, 3, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
100 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11,
101 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7,
102 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
103 0, 1, 4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
104 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11,
105 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
106 4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
107 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
108 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 0xFF, 0xFF,
109 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9,
110 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
111 0, 1, 2, 3, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
112 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF,
113 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
114 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
115 2, 3, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
116 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9,
117 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9,
118 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
119 0, 1, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
120 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11,
121 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
122 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
123 4, 5, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
124 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9,
125 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
126 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
127 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
128 0xFF, 0xFF, 0xFF, 0xFF, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
129 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13,
130 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
131 2, 3, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
132 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 12, 13, 0xFF, 0xFF,
133 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13,
134 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
135 0, 1, 4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
136 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 12, 13, 0xFF, 0xFF,
137 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
138 4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
139 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
140 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 12, 13, 0xFF, 0xFF,
141 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7,
142 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
143 0, 1, 2, 3, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
144 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF,
145 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
146 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
147 2, 3, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
148 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7,
149 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13,
150 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
151 0, 1, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
152 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 12, 13, 0xFF, 0xFF,
153 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
154 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
155 4, 5, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
156 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 12, 13,
157 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
158 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
159 0, 1, 2, 3, 4, 5, 8, 9, 12, 13, 0xFF, 0xFF,
160 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF,
161 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7,
162 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
163 2, 3, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
164 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9,
165 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7,
166 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
167 0, 1, 4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF,
168 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9,
169 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
170 4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
171 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
172 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11, 12, 13, 0xFF, 0xFF,
173 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11,
174 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
175 0, 1, 2, 3, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
176 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF,
177 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
178 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
179 2, 3, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
180 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 10, 11,
181 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11,
182 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
183 0, 1, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
184 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 10, 11, 12, 13,
185 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
186 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
187 4, 5, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
188 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 10, 11,
189 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
190 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
191 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13,
192 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF,
193 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9,
194 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
195 2, 3, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
196 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 10, 11,
197 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9,
198 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
199 0, 1, 4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF,
200 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 10, 11,
201 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
202 4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
203 6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
204 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 10, 11,
205 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7,
206 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
207 0, 1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13,
208 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 10, 11,
209 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
210 6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
211 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
212 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7,
213 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 14, 15, 0xFF, 0xFF,
214 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
215 0, 1, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
216 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
217 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
218 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
219 4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
220 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 14, 15, 0xFF, 0xFF,
221 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
222 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
223 0, 1, 2, 3, 4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
224 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
225 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7,
226 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
227 2, 3, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
228 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 14, 15,
229 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7,
230 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
231 0, 1, 4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
232 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 14, 15,
233 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
234 4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
235 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
236 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 14, 15, 0xFF, 0xFF,
237 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9,
238 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
239 0, 1, 2, 3, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
240 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF,
241 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
242 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
243 2, 3, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
244 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9,
245 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9,
246 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
247 0, 1, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
248 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 14, 15,
249 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
250 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
251 4, 5, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
252 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9,
253 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
254 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
255 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 15,
256 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
257 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11,
258 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
259 2, 3, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
260 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 10, 11, 14, 15,
261 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11,
262 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
263 0, 1, 4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
264 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 10, 11, 14, 15,
265 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
266 4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
267 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
268 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 10, 11, 14, 15,
269 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7,
270 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
271 0, 1, 2, 3, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF,
272 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 10, 11, 14, 15,
273 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
274 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
275 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF,
276 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7,
277 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11,
278 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
279 0, 1, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
280 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 10, 11, 14, 15,
281 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
282 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
283 4, 5, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
284 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 10, 11,
285 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
286 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
287 0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 14, 15,
288 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 10, 11, 14, 15,
289 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7,
290 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
291 2, 3, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF,
292 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9,
293 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7,
294 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
295 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15,
296 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9,
297 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
298 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF,
299 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
300 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13, 14, 15, 0xFF, 0xFF,
301 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 12, 13,
302 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
303 0, 1, 2, 3, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
304 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF,
305 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
306 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
307 2, 3, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
308 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 12, 13,
309 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 12, 13,
310 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
311 0, 1, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
312 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 12, 13, 14, 15,
313 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
314 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
315 4, 5, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
316 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 12, 13,
317 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
318 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
319 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15,
320 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF,
321 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9,
322 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
323 2, 3, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
324 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 12, 13,
325 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9,
326 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
327 0, 1, 4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF,
328 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 12, 13,
329 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
330 4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
331 6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
332 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 12, 13,
333 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7,
334 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
335 0, 1, 2, 3, 6, 7, 8, 9, 12, 13, 14, 15,
336 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 12, 13,
337 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
338 6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
339 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15,
340 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7,
341 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 10, 11, 12, 13,
342 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
343 0, 1, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
344 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 12, 13, 14, 15,
345 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
346 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
347 4, 5, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
348 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 12, 13,
349 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
350 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
351 0, 1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15,
352 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 12, 13, 14, 15,
353 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7,
354 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
355 2, 3, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF,
356 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11,
357 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7,
358 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
359 0, 1, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15,
360 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11,
361 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
362 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF,
363 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
364 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 12, 13,
365 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9,
366 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
367 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15,
368 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 12, 13,
369 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
370 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
371 2, 3, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15,
372 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9,
373 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 6, 7, 8, 9,
374 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
375 0, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
376 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11,
377 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
378 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF,
379 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
380 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9,
381 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 2, 3, 4, 5,
382 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF,
383 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
384 12, 13, 14, 15};
385
386/**
387 * From Schlegel et al., Fast Sorted-Set Intersection using SIMD Instructions
388 * Optimized by D. Lemire on May 3rd 2013
389 */
390int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
391 const uint16_t *__restrict__ B, size_t s_b,
392 uint16_t *C) {
393 size_t count = 0;
394 size_t i_a = 0, i_b = 0;
395 const int vectorlength = sizeof(__m128i) / sizeof(uint16_t);
396 const size_t st_a = (s_a / vectorlength) * vectorlength;
397 const size_t st_b = (s_b / vectorlength) * vectorlength;
398 __m128i v_a, v_b;
399 if ((i_a < st_a) && (i_b < st_b)) {
400 v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
401 v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
402 while ((A[i_a] == 0) || (B[i_b] == 0)) {
403 const __m128i res_v = _mm_cmpestrm(
404 v_b, vectorlength, v_a, vectorlength,
405 _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
406 const int r = _mm_extract_epi32(res_v, 0);
407 __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + r);
408 __m128i p = _mm_shuffle_epi8(v_a, sm16);
409 _mm_storeu_si128((__m128i *)&C[count], p); // can overflow
410 count += _mm_popcnt_u32(r);
411 const uint16_t a_max = A[i_a + vectorlength - 1];
412 const uint16_t b_max = B[i_b + vectorlength - 1];
413 if (a_max <= b_max) {
414 i_a += vectorlength;
415 if (i_a == st_a) break;
416 v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
417 }
418 if (b_max <= a_max) {
419 i_b += vectorlength;
420 if (i_b == st_b) break;
421 v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
422 }
423 }
424 if ((i_a < st_a) && (i_b < st_b))
425 while (true) {
426 const __m128i res_v = _mm_cmpistrm(
427 v_b, v_a,
428 _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
429 const int r = _mm_extract_epi32(res_v, 0);
430 __m128i sm16 =
431 _mm_load_si128((const __m128i *)shuffle_mask16 + r);
432 __m128i p = _mm_shuffle_epi8(v_a, sm16);
433 _mm_storeu_si128((__m128i *)&C[count], p); // can overflow
434 count += _mm_popcnt_u32(r);
435 const uint16_t a_max = A[i_a + vectorlength - 1];
436 const uint16_t b_max = B[i_b + vectorlength - 1];
437 if (a_max <= b_max) {
438 i_a += vectorlength;
439 if (i_a == st_a) break;
440 v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
441 }
442 if (b_max <= a_max) {
443 i_b += vectorlength;
444 if (i_b == st_b) break;
445 v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
446 }
447 }
448 }
449 // intersect the tail using scalar intersection
450 while (i_a < s_a && i_b < s_b) {
451 uint16_t a = A[i_a];
452 uint16_t b = B[i_b];
453 if (a < b) {
454 i_a++;
455 } else if (b < a) {
456 i_b++;
457 } else {
458 C[count] = a; //==b;
459 count++;
460 i_a++;
461 i_b++;
462 }
463 }
464 return (int32_t)count;
465}
466
467int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A,
468 size_t s_a,
469 const uint16_t *__restrict__ B,
470 size_t s_b) {
471 size_t count = 0;
472 size_t i_a = 0, i_b = 0;
473 const int vectorlength = sizeof(__m128i) / sizeof(uint16_t);
474 const size_t st_a = (s_a / vectorlength) * vectorlength;
475 const size_t st_b = (s_b / vectorlength) * vectorlength;
476 __m128i v_a, v_b;
477 if ((i_a < st_a) && (i_b < st_b)) {
478 v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
479 v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
480 while ((A[i_a] == 0) || (B[i_b] == 0)) {
481 const __m128i res_v = _mm_cmpestrm(
482 v_b, vectorlength, v_a, vectorlength,
483 _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
484 const int r = _mm_extract_epi32(res_v, 0);
485 count += _mm_popcnt_u32(r);
486 const uint16_t a_max = A[i_a + vectorlength - 1];
487 const uint16_t b_max = B[i_b + vectorlength - 1];
488 if (a_max <= b_max) {
489 i_a += vectorlength;
490 if (i_a == st_a) break;
491 v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
492 }
493 if (b_max <= a_max) {
494 i_b += vectorlength;
495 if (i_b == st_b) break;
496 v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
497 }
498 }
499 if ((i_a < st_a) && (i_b < st_b))
500 while (true) {
501 const __m128i res_v = _mm_cmpistrm(
502 v_b, v_a,
503 _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
504 const int r = _mm_extract_epi32(res_v, 0);
505 count += _mm_popcnt_u32(r);
506 const uint16_t a_max = A[i_a + vectorlength - 1];
507 const uint16_t b_max = B[i_b + vectorlength - 1];
508 if (a_max <= b_max) {
509 i_a += vectorlength;
510 if (i_a == st_a) break;
511 v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
512 }
513 if (b_max <= a_max) {
514 i_b += vectorlength;
515 if (i_b == st_b) break;
516 v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
517 }
518 }
519 }
520 // intersect the tail using scalar intersection
521 while (i_a < s_a && i_b < s_b) {
522 uint16_t a = A[i_a];
523 uint16_t b = B[i_b];
524 if (a < b) {
525 i_a++;
526 } else if (b < a) {
527 i_b++;
528 } else {
529 count++;
530 i_a++;
531 i_b++;
532 }
533 }
534 return (int32_t)count;
535}
536
537/////////
538// Warning:
539// This function may not be safe if A == C or B == C.
540/////////
541int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a,
542 const uint16_t *__restrict__ B, size_t s_b,
543 uint16_t *C) {
544 // we handle the degenerate case
545 if (s_a == 0) return 0;
546 if (s_b == 0) {
547 if (A != C) memcpy(C, A, sizeof(uint16_t) * s_a);
548 return (int32_t)s_a;
549 }
550 // handle the leading zeroes, it is messy but it allows us to use the fast
551 // _mm_cmpistrm intrinsic safely
552 int32_t count = 0;
553 if ((A[0] == 0) || (B[0] == 0)) {
554 if ((A[0] == 0) && (B[0] == 0)) {
555 A++;
556 s_a--;
557 B++;
558 s_b--;
559 } else if (A[0] == 0) {
560 C[count++] = 0;
561 A++;
562 s_a--;
563 } else {
564 B++;
565 s_b--;
566 }
567 }
568 // at this point, we have two non-empty arrays, made of non-zero
569 // increasing values.
570 size_t i_a = 0, i_b = 0;
571 const size_t vectorlength = sizeof(__m128i) / sizeof(uint16_t);
572 const size_t st_a = (s_a / vectorlength) * vectorlength;
573 const size_t st_b = (s_b / vectorlength) * vectorlength;
574 if ((i_a < st_a) && (i_b < st_b)) { // this is the vectorized code path
575 __m128i v_a, v_b; //, v_bmax;
576 // we load a vector from A and a vector from B
577 v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
578 v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
579 // we have a runningmask which indicates which values from A have been
580 // spotted in B, these don't get written out.
581 __m128i runningmask_a_found_in_b = _mm_setzero_si128();
582 /****
583 * start of the main vectorized loop
584 *****/
585 while (true) {
586 // afoundinb will contain a mask indicate for each entry in A
587 // whether it is seen
588 // in B
589 const __m128i a_found_in_b =
590 _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY |
591 _SIDD_BIT_MASK);
592 runningmask_a_found_in_b =
593 _mm_or_si128(runningmask_a_found_in_b, a_found_in_b);
594 // we always compare the last values of A and B
595 const uint16_t a_max = A[i_a + vectorlength - 1];
596 const uint16_t b_max = B[i_b + vectorlength - 1];
597 if (a_max <= b_max) {
598 // Ok. In this code path, we are ready to write our v_a
599 // because there is no need to read more from B, they will
600 // all be large values.
601 const int bitmask_belongs_to_difference =
602 _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF;
603 /*** next few lines are probably expensive *****/
604 __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 +
605 bitmask_belongs_to_difference);
606 __m128i p = _mm_shuffle_epi8(v_a, sm16);
607 _mm_storeu_si128((__m128i *)&C[count], p); // can overflow
608 count += _mm_popcnt_u32(bitmask_belongs_to_difference);
609 // we advance a
610 i_a += vectorlength;
611 if (i_a == st_a) // no more
612 break;
613 runningmask_a_found_in_b = _mm_setzero_si128();
614 v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
615 }
616 if (b_max <= a_max) {
617 // in this code path, the current v_b has become useless
618 i_b += vectorlength;
619 if (i_b == st_b) break;
620 v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
621 }
622 }
623 // at this point, either we have i_a == st_a, which is the end of the
624 // vectorized processing,
625 // or we have i_b == st_b, and we are not done processing the vector...
626 // so we need to finish it off.
627 if (i_a < st_a) { // we have unfinished business...
628 uint16_t buffer[8]; // buffer to do a masked load
629 memset(buffer, 0, 8 * sizeof(uint16_t));
630 memcpy(buffer, B + i_b, (s_b - i_b) * sizeof(uint16_t));
631 v_b = _mm_lddqu_si128((__m128i *)buffer);
632 const __m128i a_found_in_b =
633 _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY |
634 _SIDD_BIT_MASK);
635 runningmask_a_found_in_b =
636 _mm_or_si128(runningmask_a_found_in_b, a_found_in_b);
637 const int bitmask_belongs_to_difference =
638 _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF;
639 __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 +
640 bitmask_belongs_to_difference);
641 __m128i p = _mm_shuffle_epi8(v_a, sm16);
642 _mm_storeu_si128((__m128i *)&C[count], p); // can overflow
643 count += _mm_popcnt_u32(bitmask_belongs_to_difference);
644 i_a += vectorlength;
645 }
646 // at this point we should have i_a == st_a and i_b == st_b
647 }
648 // do the tail using scalar code
649 while (i_a < s_a && i_b < s_b) {
650 uint16_t a = A[i_a];
651 uint16_t b = B[i_b];
652 if (b < a) {
653 i_b++;
654 } else if (a < b) {
655 C[count] = a;
656 count++;
657 i_a++;
658 } else { //==
659 i_a++;
660 i_b++;
661 }
662 }
663 if (i_a < s_a) {
664 if(C == A) {
665 assert((size_t)count <= i_a);
666 if((size_t)count < i_a) {
667 memmove(C + count, A + i_a, sizeof(uint16_t) * (s_a - i_a));
668 }
669 } else {
670 for(size_t i = 0; i < (s_a - i_a); i++) {
671 C[count + i] = A[i + i_a];
672 }
673 }
674 count += (int32_t)(s_a - i_a);
675 }
676 return count;
677}
678
679#endif // USESSE4
680
681
682
683#ifdef USE_OLD_SKEW_INTERSECT
684// TODO: given enough experience with the new skew intersect, drop the old one from the code base.
685
686
687/* Computes the intersection between one small and one large set of uint16_t.
688 * Stores the result into buffer and return the number of elements. */
689int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s,
690 const uint16_t *large, size_t size_l,
691 uint16_t *buffer) {
692 size_t pos = 0, idx_l = 0, idx_s = 0;
693
694 if (0 == size_s) {
695 return 0;
696 }
697
698 uint16_t val_l = large[idx_l], val_s = small[idx_s];
699
700 while (true) {
701 if (val_l < val_s) {
702 idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s);
703 if (idx_l == size_l) break;
704 val_l = large[idx_l];
705 } else if (val_s < val_l) {
706 idx_s++;
707 if (idx_s == size_s) break;
708 val_s = small[idx_s];
709 } else {
710 buffer[pos++] = val_s;
711 idx_s++;
712 if (idx_s == size_s) break;
713 val_s = small[idx_s];
714 idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s);
715 if (idx_l == size_l) break;
716 val_l = large[idx_l];
717 }
718 }
719
720 return (int32_t)pos;
721}
722#else // USE_OLD_SKEW_INTERSECT
723
724
725/**
726* Branchless binary search going after 4 values at once.
727* Assumes that array is sorted.
728* You have that array[*index1] >= target1, array[*index12] >= target2, ...
729* except when *index1 = n, in which case you know that all values in array are
730* smaller than target1, and so forth.
731* It has logarithmic complexity.
732*/
733static void binarySearch4(const uint16_t *array, int32_t n, uint16_t target1,
734 uint16_t target2, uint16_t target3, uint16_t target4,
735 int32_t *index1, int32_t *index2, int32_t *index3,
736 int32_t *index4) {
737 const uint16_t *base1 = array;
738 const uint16_t *base2 = array;
739 const uint16_t *base3 = array;
740 const uint16_t *base4 = array;
741 if (n == 0)
742 return;
743 while (n > 1) {
744 int32_t half = n >> 1;
745 base1 = (base1[half] < target1) ? &base1[half] : base1;
746 base2 = (base2[half] < target2) ? &base2[half] : base2;
747 base3 = (base3[half] < target3) ? &base3[half] : base3;
748 base4 = (base4[half] < target4) ? &base4[half] : base4;
749 n -= half;
750 }
751 *index1 = (int32_t)((*base1 < target1) + base1 - array);
752 *index2 = (int32_t)((*base2 < target2) + base2 - array);
753 *index3 = (int32_t)((*base3 < target3) + base3 - array);
754 *index4 = (int32_t)((*base4 < target4) + base4 - array);
755}
756
757/**
758* Branchless binary search going after 2 values at once.
759* Assumes that array is sorted.
760* You have that array[*index1] >= target1, array[*index12] >= target2.
761* except when *index1 = n, in which case you know that all values in array are
762* smaller than target1, and so forth.
763* It has logarithmic complexity.
764*/
765static void binarySearch2(const uint16_t *array, int32_t n, uint16_t target1,
766 uint16_t target2, int32_t *index1, int32_t *index2) {
767 const uint16_t *base1 = array;
768 const uint16_t *base2 = array;
769 if (n == 0)
770 return;
771 while (n > 1) {
772 int32_t half = n >> 1;
773 base1 = (base1[half] < target1) ? &base1[half] : base1;
774 base2 = (base2[half] < target2) ? &base2[half] : base2;
775 n -= half;
776 }
777 *index1 = (int32_t)((*base1 < target1) + base1 - array);
778 *index2 = (int32_t)((*base2 < target2) + base2 - array);
779}
780
781/* Computes the intersection between one small and one large set of uint16_t.
782 * Stores the result into buffer and return the number of elements.
783 * Processes the small set in blocks of 4 values calling binarySearch4
784 * and binarySearch2. This approach can be slightly superior to a conventional
785 * galloping search in some instances.
786 */
787int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s,
788 const uint16_t *large, size_t size_l,
789 uint16_t *buffer) {
790 size_t pos = 0, idx_l = 0, idx_s = 0;
791
792 if (0 == size_s) {
793 return 0;
794 }
795 int32_t index1 = 0, index2 = 0, index3 = 0, index4 = 0;
796 while ((idx_s + 4 <= size_s) && (idx_l < size_l)) {
797 uint16_t target1 = small[idx_s];
798 uint16_t target2 = small[idx_s + 1];
799 uint16_t target3 = small[idx_s + 2];
800 uint16_t target4 = small[idx_s + 3];
801 binarySearch4(array: large + idx_l, n: (int32_t)(size_l - idx_l), target1, target2, target3,
802 target4, index1: &index1, index2: &index2, index3: &index3, index4: &index4);
803 if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) {
804 buffer[pos++] = target1;
805 }
806 if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) {
807 buffer[pos++] = target2;
808 }
809 if ((index3 + idx_l < size_l) && (large[idx_l + index3] == target3)) {
810 buffer[pos++] = target3;
811 }
812 if ((index4 + idx_l < size_l) && (large[idx_l + index4] == target4)) {
813 buffer[pos++] = target4;
814 }
815 idx_s += 4;
816 idx_l += index4;
817 }
818 if ((idx_s + 2 <= size_s) && (idx_l < size_l)) {
819 uint16_t target1 = small[idx_s];
820 uint16_t target2 = small[idx_s + 1];
821 binarySearch2(array: large + idx_l, n: (int32_t)(size_l - idx_l), target1, target2, index1: &index1,
822 index2: &index2);
823 if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) {
824 buffer[pos++] = target1;
825 }
826 if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) {
827 buffer[pos++] = target2;
828 }
829 idx_s += 2;
830 idx_l += index2;
831 }
832 if ((idx_s < size_s) && (idx_l < size_l)) {
833 uint16_t val_s = small[idx_s];
834 int32_t index = binarySearch(array: large + idx_l, lenarray: (int32_t)(size_l - idx_l), ikey: val_s);
835 if (index >= 0)
836 buffer[pos++] = val_s;
837 }
838 return (int32_t)pos;
839}
840
841
842#endif //USE_OLD_SKEW_INTERSECT
843
844
845// TODO: this could be accelerated, possibly, by using binarySearch4 as above.
846int32_t intersect_skewed_uint16_cardinality(const uint16_t *small,
847 size_t size_s,
848 const uint16_t *large,
849 size_t size_l) {
850 size_t pos = 0, idx_l = 0, idx_s = 0;
851
852 if (0 == size_s) {
853 return 0;
854 }
855
856 uint16_t val_l = large[idx_l], val_s = small[idx_s];
857
858 while (true) {
859 if (val_l < val_s) {
860 idx_l = advanceUntil(array: large, pos: (int32_t)idx_l, length: (int32_t)size_l, min: val_s);
861 if (idx_l == size_l) break;
862 val_l = large[idx_l];
863 } else if (val_s < val_l) {
864 idx_s++;
865 if (idx_s == size_s) break;
866 val_s = small[idx_s];
867 } else {
868 pos++;
869 idx_s++;
870 if (idx_s == size_s) break;
871 val_s = small[idx_s];
872 idx_l = advanceUntil(array: large, pos: (int32_t)idx_l, length: (int32_t)size_l, min: val_s);
873 if (idx_l == size_l) break;
874 val_l = large[idx_l];
875 }
876 }
877
878 return (int32_t)pos;
879}
880
881bool intersect_skewed_uint16_nonempty(const uint16_t *small, size_t size_s,
882 const uint16_t *large, size_t size_l) {
883 size_t idx_l = 0, idx_s = 0;
884
885 if (0 == size_s) {
886 return false;
887 }
888
889 uint16_t val_l = large[idx_l], val_s = small[idx_s];
890
891 while (true) {
892 if (val_l < val_s) {
893 idx_l = advanceUntil(array: large, pos: (int32_t)idx_l, length: (int32_t)size_l, min: val_s);
894 if (idx_l == size_l) break;
895 val_l = large[idx_l];
896 } else if (val_s < val_l) {
897 idx_s++;
898 if (idx_s == size_s) break;
899 val_s = small[idx_s];
900 } else {
901 return true;
902 }
903 }
904
905 return false;
906}
907
908/**
909 * Generic intersection function.
910 */
911int32_t intersect_uint16(const uint16_t *A, const size_t lenA,
912 const uint16_t *B, const size_t lenB, uint16_t *out) {
913 const uint16_t *initout = out;
914 if (lenA == 0 || lenB == 0) return 0;
915 const uint16_t *endA = A + lenA;
916 const uint16_t *endB = B + lenB;
917
918 while (1) {
919 while (*A < *B) {
920 SKIP_FIRST_COMPARE:
921 if (++A == endA) return (int32_t)(out - initout);
922 }
923 while (*A > *B) {
924 if (++B == endB) return (int32_t)(out - initout);
925 }
926 if (*A == *B) {
927 *out++ = *A;
928 if (++A == endA || ++B == endB) return (int32_t)(out - initout);
929 } else {
930 goto SKIP_FIRST_COMPARE;
931 }
932 }
933 return (int32_t)(out - initout); // NOTREACHED
934}
935
936int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA,
937 const uint16_t *B, const size_t lenB) {
938 int32_t answer = 0;
939 if (lenA == 0 || lenB == 0) return 0;
940 const uint16_t *endA = A + lenA;
941 const uint16_t *endB = B + lenB;
942
943 while (1) {
944 while (*A < *B) {
945 SKIP_FIRST_COMPARE:
946 if (++A == endA) return answer;
947 }
948 while (*A > *B) {
949 if (++B == endB) return answer;
950 }
951 if (*A == *B) {
952 ++answer;
953 if (++A == endA || ++B == endB) return answer;
954 } else {
955 goto SKIP_FIRST_COMPARE;
956 }
957 }
958 return answer; // NOTREACHED
959}
960
961
962bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA,
963 const uint16_t *B, const size_t lenB) {
964 if (lenA == 0 || lenB == 0) return 0;
965 const uint16_t *endA = A + lenA;
966 const uint16_t *endB = B + lenB;
967
968 while (1) {
969 while (*A < *B) {
970 SKIP_FIRST_COMPARE:
971 if (++A == endA) return false;
972 }
973 while (*A > *B) {
974 if (++B == endB) return false;
975 }
976 if (*A == *B) {
977 return true;
978 } else {
979 goto SKIP_FIRST_COMPARE;
980 }
981 }
982 return false; // NOTREACHED
983}
984
985
986
987/**
988 * Generic intersection function.
989 */
990size_t intersection_uint32(const uint32_t *A, const size_t lenA,
991 const uint32_t *B, const size_t lenB,
992 uint32_t *out) {
993 const uint32_t *initout = out;
994 if (lenA == 0 || lenB == 0) return 0;
995 const uint32_t *endA = A + lenA;
996 const uint32_t *endB = B + lenB;
997
998 while (1) {
999 while (*A < *B) {
1000 SKIP_FIRST_COMPARE:
1001 if (++A == endA) return (out - initout);
1002 }
1003 while (*A > *B) {
1004 if (++B == endB) return (out - initout);
1005 }
1006 if (*A == *B) {
1007 *out++ = *A;
1008 if (++A == endA || ++B == endB) return (out - initout);
1009 } else {
1010 goto SKIP_FIRST_COMPARE;
1011 }
1012 }
1013 return (out - initout); // NOTREACHED
1014}
1015
1016size_t intersection_uint32_card(const uint32_t *A, const size_t lenA,
1017 const uint32_t *B, const size_t lenB) {
1018 if (lenA == 0 || lenB == 0) return 0;
1019 size_t card = 0;
1020 const uint32_t *endA = A + lenA;
1021 const uint32_t *endB = B + lenB;
1022
1023 while (1) {
1024 while (*A < *B) {
1025 SKIP_FIRST_COMPARE:
1026 if (++A == endA) return card;
1027 }
1028 while (*A > *B) {
1029 if (++B == endB) return card;
1030 }
1031 if (*A == *B) {
1032 card++;
1033 if (++A == endA || ++B == endB) return card;
1034 } else {
1035 goto SKIP_FIRST_COMPARE;
1036 }
1037 }
1038 return card; // NOTREACHED
1039}
1040
1041// can one vectorize the computation of the union? (Update: Yes! See
1042// union_vector16).
1043
1044size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
1045 size_t size_2, uint16_t *buffer) {
1046 size_t pos = 0, idx_1 = 0, idx_2 = 0;
1047
1048 if (0 == size_2) {
1049 memmove(dest: buffer, src: set_1, n: size_1 * sizeof(uint16_t));
1050 return size_1;
1051 }
1052 if (0 == size_1) {
1053 memmove(dest: buffer, src: set_2, n: size_2 * sizeof(uint16_t));
1054 return size_2;
1055 }
1056
1057 uint16_t val_1 = set_1[idx_1], val_2 = set_2[idx_2];
1058
1059 while (true) {
1060 if (val_1 < val_2) {
1061 buffer[pos++] = val_1;
1062 ++idx_1;
1063 if (idx_1 >= size_1) break;
1064 val_1 = set_1[idx_1];
1065 } else if (val_2 < val_1) {
1066 buffer[pos++] = val_2;
1067 ++idx_2;
1068 if (idx_2 >= size_2) break;
1069 val_2 = set_2[idx_2];
1070 } else {
1071 buffer[pos++] = val_1;
1072 ++idx_1;
1073 ++idx_2;
1074 if (idx_1 >= size_1 || idx_2 >= size_2) break;
1075 val_1 = set_1[idx_1];
1076 val_2 = set_2[idx_2];
1077 }
1078 }
1079
1080 if (idx_1 < size_1) {
1081 const size_t n_elems = size_1 - idx_1;
1082 memmove(dest: buffer + pos, src: set_1 + idx_1, n: n_elems * sizeof(uint16_t));
1083 pos += n_elems;
1084 } else if (idx_2 < size_2) {
1085 const size_t n_elems = size_2 - idx_2;
1086 memmove(dest: buffer + pos, src: set_2 + idx_2, n: n_elems * sizeof(uint16_t));
1087 pos += n_elems;
1088 }
1089
1090 return pos;
1091}
1092
1093int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2,
1094 int length2, uint16_t *a_out) {
1095 int out_card = 0;
1096 int k1 = 0, k2 = 0;
1097 if (length1 == 0) return 0;
1098 if (length2 == 0) {
1099 if (a1 != a_out) memcpy(dest: a_out, src: a1, n: sizeof(uint16_t) * length1);
1100 return length1;
1101 }
1102 uint16_t s1 = a1[k1];
1103 uint16_t s2 = a2[k2];
1104 while (true) {
1105 if (s1 < s2) {
1106 a_out[out_card++] = s1;
1107 ++k1;
1108 if (k1 >= length1) {
1109 break;
1110 }
1111 s1 = a1[k1];
1112 } else if (s1 == s2) {
1113 ++k1;
1114 ++k2;
1115 if (k1 >= length1) {
1116 break;
1117 }
1118 if (k2 >= length2) {
1119 memmove(dest: a_out + out_card, src: a1 + k1,
1120 n: sizeof(uint16_t) * (length1 - k1));
1121 return out_card + length1 - k1;
1122 }
1123 s1 = a1[k1];
1124 s2 = a2[k2];
1125 } else { // if (val1>val2)
1126 ++k2;
1127 if (k2 >= length2) {
1128 memmove(dest: a_out + out_card, src: a1 + k1,
1129 n: sizeof(uint16_t) * (length1 - k1));
1130 return out_card + length1 - k1;
1131 }
1132 s2 = a2[k2];
1133 }
1134 }
1135 return out_card;
1136}
1137
1138int32_t xor_uint16(const uint16_t *array_1, int32_t card_1,
1139 const uint16_t *array_2, int32_t card_2, uint16_t *out) {
1140 int32_t pos1 = 0, pos2 = 0, pos_out = 0;
1141 while (pos1 < card_1 && pos2 < card_2) {
1142 const uint16_t v1 = array_1[pos1];
1143 const uint16_t v2 = array_2[pos2];
1144 if (v1 == v2) {
1145 ++pos1;
1146 ++pos2;
1147 continue;
1148 }
1149 if (v1 < v2) {
1150 out[pos_out++] = v1;
1151 ++pos1;
1152 } else {
1153 out[pos_out++] = v2;
1154 ++pos2;
1155 }
1156 }
1157 if (pos1 < card_1) {
1158 const size_t n_elems = card_1 - pos1;
1159 memcpy(dest: out + pos_out, src: array_1 + pos1, n: n_elems * sizeof(uint16_t));
1160 pos_out += (int32_t)n_elems;
1161 } else if (pos2 < card_2) {
1162 const size_t n_elems = card_2 - pos2;
1163 memcpy(dest: out + pos_out, src: array_2 + pos2, n: n_elems * sizeof(uint16_t));
1164 pos_out += (int32_t)n_elems;
1165 }
1166 return pos_out;
1167}
1168
1169#ifdef USESSE4
1170
1171/***
1172 * start of the SIMD 16-bit union code
1173 *
1174 */
1175
1176// Assuming that vInput1 and vInput2 are sorted, produces a sorted output going
1177// from vecMin all the way to vecMax
1178// developed originally for merge sort using SIMD instructions.
1179// Standard merge. See, e.g., Inoue and Taura, SIMD- and Cache-Friendly
1180// Algorithm for Sorting an Array of Structures
1181static inline void sse_merge(const __m128i *vInput1,
1182 const __m128i *vInput2, // input 1 & 2
1183 __m128i *vecMin, __m128i *vecMax) { // output
1184 __m128i vecTmp;
1185 vecTmp = _mm_min_epu16(*vInput1, *vInput2);
1186 *vecMax = _mm_max_epu16(*vInput1, *vInput2);
1187 vecTmp = _mm_alignr_epi8(vecTmp, vecTmp, 2);
1188 *vecMin = _mm_min_epu16(vecTmp, *vecMax);
1189 *vecMax = _mm_max_epu16(vecTmp, *vecMax);
1190 vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
1191 *vecMin = _mm_min_epu16(vecTmp, *vecMax);
1192 *vecMax = _mm_max_epu16(vecTmp, *vecMax);
1193 vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
1194 *vecMin = _mm_min_epu16(vecTmp, *vecMax);
1195 *vecMax = _mm_max_epu16(vecTmp, *vecMax);
1196 vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
1197 *vecMin = _mm_min_epu16(vecTmp, *vecMax);
1198 *vecMax = _mm_max_epu16(vecTmp, *vecMax);
1199 vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
1200 *vecMin = _mm_min_epu16(vecTmp, *vecMax);
1201 *vecMax = _mm_max_epu16(vecTmp, *vecMax);
1202 vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
1203 *vecMin = _mm_min_epu16(vecTmp, *vecMax);
1204 *vecMax = _mm_max_epu16(vecTmp, *vecMax);
1205 vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
1206 *vecMin = _mm_min_epu16(vecTmp, *vecMax);
1207 *vecMax = _mm_max_epu16(vecTmp, *vecMax);
1208 *vecMin = _mm_alignr_epi8(*vecMin, *vecMin, 2);
1209}
1210
1211// used by store_unique, generated by simdunion.py
1212static uint8_t uniqshuf[] = {
1213 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb,
1214 0xc, 0xd, 0xe, 0xf, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
1215 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1216 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
1217 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
1218 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9,
1219 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
1220 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1221 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
1222 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
1223 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1224 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
1225 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
1226 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb,
1227 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9,
1228 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1229 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
1230 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
1231 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9,
1232 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1233 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1234 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
1235 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
1236 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1237 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
1238 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd,
1239 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1240 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1241 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
1242 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd,
1243 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb,
1244 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1245 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
1246 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd,
1247 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1248 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1249 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1250 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd,
1251 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb,
1252 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1253 0x0, 0x1, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1254 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
1255 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1256 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
1257 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf,
1258 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
1259 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7,
1260 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1261 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf,
1262 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd,
1263 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7,
1264 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1265 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1266 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9,
1267 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
1268 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1269 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
1270 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf,
1271 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1272 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1273 0x2, 0x3, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1274 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf,
1275 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd,
1276 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1277 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf,
1278 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd,
1279 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1280 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1281 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1282 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd,
1283 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
1284 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1285 0x0, 0x1, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1286 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
1287 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1288 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1289 0x2, 0x3, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1290 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf,
1291 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd,
1292 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1293 0x0, 0x1, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1294 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
1295 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd,
1296 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1297 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1298 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
1299 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
1300 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1301 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf,
1302 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb,
1303 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1304 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1305 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
1306 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb,
1307 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9,
1308 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1309 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf,
1310 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb,
1311 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1312 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1313 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1314 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb,
1315 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9,
1316 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1317 0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1318 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
1319 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1320 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1321 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
1322 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb,
1323 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7,
1324 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1325 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
1326 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf,
1327 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7,
1328 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1329 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1330 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb,
1331 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
1332 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1333 0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1334 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
1335 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1336 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1337 0x2, 0x3, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1338 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
1339 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xe, 0xf,
1340 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1341 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf,
1342 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
1343 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1344 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1345 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1346 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9,
1347 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
1348 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1349 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1350 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF,
1351 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1352 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1353 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1354 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf,
1355 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9,
1356 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1357 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1358 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF,
1359 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9,
1360 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1361 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1362 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
1363 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
1364 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1365 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1366 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF,
1367 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1368 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1369 0x2, 0x3, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1370 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF,
1371 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xe, 0xf,
1372 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1373 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1374 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF,
1375 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1376 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1377 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1378 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xe, 0xf, 0xFF, 0xFF,
1379 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xe, 0xf,
1380 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1381 0x0, 0x1, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1382 0xFF, 0xFF, 0xFF, 0xFF, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1383 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1384 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF,
1385 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
1386 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
1387 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7,
1388 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1389 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
1390 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb,
1391 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7,
1392 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1393 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1394 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9,
1395 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
1396 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1397 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF,
1398 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
1399 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1400 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1401 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1402 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
1403 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb,
1404 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1405 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd,
1406 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb,
1407 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1408 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1409 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1410 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb,
1411 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
1412 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1413 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1414 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF,
1415 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1416 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1417 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1418 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd,
1419 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb,
1420 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1421 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1422 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF,
1423 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb,
1424 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1425 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1426 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
1427 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
1428 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1429 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF,
1430 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd,
1431 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1432 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1433 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1434 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd,
1435 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9,
1436 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1437 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF,
1438 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd,
1439 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1440 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1441 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1442 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xc, 0xd,
1443 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9,
1444 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1445 0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1446 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1447 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1448 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1449 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1450 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd,
1451 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7,
1452 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1453 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1454 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF,
1455 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7,
1456 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1457 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1458 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xc, 0xd,
1459 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
1460 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1461 0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1462 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1463 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1464 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1465 0x2, 0x3, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1466 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1467 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xc, 0xd, 0xFF, 0xFF,
1468 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1469 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb,
1470 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
1471 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1472 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1473 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
1474 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9,
1475 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
1476 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1477 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
1478 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF,
1479 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1480 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1481 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
1482 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb,
1483 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9,
1484 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1485 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
1486 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF,
1487 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9,
1488 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1489 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1490 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
1491 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
1492 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1493 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
1494 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF,
1495 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1496 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1497 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1498 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF,
1499 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb,
1500 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1501 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
1502 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF,
1503 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1504 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1505 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1506 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xFF, 0xFF,
1507 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb,
1508 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1509 0x0, 0x1, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1510 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1511 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1512 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1513 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1514 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
1515 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7,
1516 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1517 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1518 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF,
1519 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7,
1520 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1521 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1522 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9,
1523 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
1524 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1525 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1526 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1527 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1528 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1529 0x2, 0x3, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1530 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1531 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xFF, 0xFF,
1532 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1533 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF,
1534 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF,
1535 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1536 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1537 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1538 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xFF, 0xFF,
1539 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
1540 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1541 0x0, 0x1, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1542 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1543 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1544 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1545 0x2, 0x3, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1546 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF,
1547 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xFF, 0xFF,
1548 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1549 0x0, 0x1, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1550 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1551 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xFF, 0xFF,
1552 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1553 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1554 0xFF, 0xFF, 0xFF, 0xFF};
1555
1556// write vector new, while omitting repeated values assuming that previously
1557// written vector was "old"
1558static inline int store_unique(__m128i old, __m128i newval, uint16_t *output) {
1559 __m128i vecTmp = _mm_alignr_epi8(newval, old, 16 - 2);
1560 // lots of high latency instructions follow (optimize?)
1561 int M = _mm_movemask_epi8(
1562 _mm_packs_epi16(_mm_cmpeq_epi16(vecTmp, newval), _mm_setzero_si128()));
1563 int numberofnewvalues = 8 - _mm_popcnt_u32(M);
1564 __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M);
1565 __m128i val = _mm_shuffle_epi8(newval, key);
1566 _mm_storeu_si128((__m128i *)output, val);
1567 return numberofnewvalues;
1568}
1569
1570// working in-place, this function overwrites the repeated values
1571// could be avoided?
1572static inline uint32_t unique(uint16_t *out, uint32_t len) {
1573 uint32_t pos = 1;
1574 for (uint32_t i = 1; i < len; ++i) {
1575 if (out[i] != out[i - 1]) {
1576 out[pos++] = out[i];
1577 }
1578 }
1579 return pos;
1580}
1581
1582// use with qsort, could be avoided
1583static int uint16_compare(const void *a, const void *b) {
1584 return (*(uint16_t *)a - *(uint16_t *)b);
1585}
1586
1587// a one-pass SSE union algorithm
1588// This function may not be safe if array1 == output or array2 == output.
1589uint32_t union_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
1590 const uint16_t *__restrict__ array2, uint32_t length2,
1591 uint16_t *__restrict__ output) {
1592 if ((length1 < 8) || (length2 < 8)) {
1593 return (uint32_t)union_uint16(array1, length1, array2, length2, output);
1594 }
1595 __m128i vA, vB, V, vecMin, vecMax;
1596 __m128i laststore;
1597 uint16_t *initoutput = output;
1598 uint32_t len1 = length1 / 8;
1599 uint32_t len2 = length2 / 8;
1600 uint32_t pos1 = 0;
1601 uint32_t pos2 = 0;
1602 // we start the machine
1603 vA = _mm_lddqu_si128((const __m128i *)array1 + pos1);
1604 pos1++;
1605 vB = _mm_lddqu_si128((const __m128i *)array2 + pos2);
1606 pos2++;
1607 sse_merge(&vA, &vB, &vecMin, &vecMax);
1608 laststore = _mm_set1_epi16(-1);
1609 output += store_unique(laststore, vecMin, output);
1610 laststore = vecMin;
1611 if ((pos1 < len1) && (pos2 < len2)) {
1612 uint16_t curA, curB;
1613 curA = array1[8 * pos1];
1614 curB = array2[8 * pos2];
1615 while (true) {
1616 if (curA <= curB) {
1617 V = _mm_lddqu_si128((const __m128i *)array1 + pos1);
1618 pos1++;
1619 if (pos1 < len1) {
1620 curA = array1[8 * pos1];
1621 } else {
1622 break;
1623 }
1624 } else {
1625 V = _mm_lddqu_si128((const __m128i *)array2 + pos2);
1626 pos2++;
1627 if (pos2 < len2) {
1628 curB = array2[8 * pos2];
1629 } else {
1630 break;
1631 }
1632 }
1633 sse_merge(&V, &vecMax, &vecMin, &vecMax);
1634 output += store_unique(laststore, vecMin, output);
1635 laststore = vecMin;
1636 }
1637 sse_merge(&V, &vecMax, &vecMin, &vecMax);
1638 output += store_unique(laststore, vecMin, output);
1639 laststore = vecMin;
1640 }
1641 // we finish the rest off using a scalar algorithm
1642 // could be improved?
1643 //
1644 // copy the small end on a tmp buffer
1645 uint32_t len = (uint32_t)(output - initoutput);
1646 uint16_t buffer[16];
1647 uint32_t leftoversize = store_unique(laststore, vecMax, buffer);
1648 if (pos1 == len1) {
1649 memcpy(buffer + leftoversize, array1 + 8 * pos1,
1650 (length1 - 8 * len1) * sizeof(uint16_t));
1651 leftoversize += length1 - 8 * len1;
1652 qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare);
1653
1654 leftoversize = unique(buffer, leftoversize);
1655 len += (uint32_t)union_uint16(buffer, leftoversize, array2 + 8 * pos2,
1656 length2 - 8 * pos2, output);
1657 } else {
1658 memcpy(buffer + leftoversize, array2 + 8 * pos2,
1659 (length2 - 8 * len2) * sizeof(uint16_t));
1660 leftoversize += length2 - 8 * len2;
1661 qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare);
1662 leftoversize = unique(buffer, leftoversize);
1663 len += (uint32_t)union_uint16(buffer, leftoversize, array1 + 8 * pos1,
1664 length1 - 8 * pos1, output);
1665 }
1666 return len;
1667}
1668
1669/**
1670 * End of the SIMD 16-bit union code
1671 *
1672 */
1673
1674/**
1675 * Start of SIMD 16-bit XOR code
1676 */
1677
1678// write vector new, while omitting repeated values assuming that previously
1679// written vector was "old"
1680static inline int store_unique_xor(__m128i old, __m128i newval,
1681 uint16_t *output) {
1682 __m128i vecTmp1 = _mm_alignr_epi8(newval, old, 16 - 4);
1683 __m128i vecTmp2 = _mm_alignr_epi8(newval, old, 16 - 2);
1684 __m128i equalleft = _mm_cmpeq_epi16(vecTmp2, vecTmp1);
1685 __m128i equalright = _mm_cmpeq_epi16(vecTmp2, newval);
1686 __m128i equalleftoright = _mm_or_si128(equalleft, equalright);
1687 int M = _mm_movemask_epi8(
1688 _mm_packs_epi16(equalleftoright, _mm_setzero_si128()));
1689 int numberofnewvalues = 8 - _mm_popcnt_u32(M);
1690 __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M);
1691 __m128i val = _mm_shuffle_epi8(vecTmp2, key);
1692 _mm_storeu_si128((__m128i *)output, val);
1693 return numberofnewvalues;
1694}
1695
1696// working in-place, this function overwrites the repeated values
1697// could be avoided? Warning: assumes len > 0
1698static inline uint32_t unique_xor(uint16_t *out, uint32_t len) {
1699 uint32_t pos = 1;
1700 for (uint32_t i = 1; i < len; ++i) {
1701 if (out[i] != out[i - 1]) {
1702 out[pos++] = out[i];
1703 } else
1704 pos--; // if it is identical to previous, delete it
1705 }
1706 return pos;
1707}
1708
1709// a one-pass SSE xor algorithm
1710uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
1711 const uint16_t *__restrict__ array2, uint32_t length2,
1712 uint16_t *__restrict__ output) {
1713 if ((length1 < 8) || (length2 < 8)) {
1714 return xor_uint16(array1, length1, array2, length2, output);
1715 }
1716 __m128i vA, vB, V, vecMin, vecMax;
1717 __m128i laststore;
1718 uint16_t *initoutput = output;
1719 uint32_t len1 = length1 / 8;
1720 uint32_t len2 = length2 / 8;
1721 uint32_t pos1 = 0;
1722 uint32_t pos2 = 0;
1723 // we start the machine
1724 vA = _mm_lddqu_si128((const __m128i *)array1 + pos1);
1725 pos1++;
1726 vB = _mm_lddqu_si128((const __m128i *)array2 + pos2);
1727 pos2++;
1728 sse_merge(&vA, &vB, &vecMin, &vecMax);
1729 laststore = _mm_set1_epi16(-1);
1730 uint16_t buffer[17];
1731 output += store_unique_xor(laststore, vecMin, output);
1732
1733 laststore = vecMin;
1734 if ((pos1 < len1) && (pos2 < len2)) {
1735 uint16_t curA, curB;
1736 curA = array1[8 * pos1];
1737 curB = array2[8 * pos2];
1738 while (true) {
1739 if (curA <= curB) {
1740 V = _mm_lddqu_si128((const __m128i *)array1 + pos1);
1741 pos1++;
1742 if (pos1 < len1) {
1743 curA = array1[8 * pos1];
1744 } else {
1745 break;
1746 }
1747 } else {
1748 V = _mm_lddqu_si128((const __m128i *)array2 + pos2);
1749 pos2++;
1750 if (pos2 < len2) {
1751 curB = array2[8 * pos2];
1752 } else {
1753 break;
1754 }
1755 }
1756 sse_merge(&V, &vecMax, &vecMin, &vecMax);
1757 // conditionally stores the last value of laststore as well as all
1758 // but the
1759 // last value of vecMin
1760 output += store_unique_xor(laststore, vecMin, output);
1761 laststore = vecMin;
1762 }
1763 sse_merge(&V, &vecMax, &vecMin, &vecMax);
1764 // conditionally stores the last value of laststore as well as all but
1765 // the
1766 // last value of vecMin
1767 output += store_unique_xor(laststore, vecMin, output);
1768 laststore = vecMin;
1769 }
1770 uint32_t len = (uint32_t)(output - initoutput);
1771
1772 // we finish the rest off using a scalar algorithm
1773 // could be improved?
1774 // conditionally stores the last value of laststore as well as all but the
1775 // last value of vecMax,
1776 // we store to "buffer"
1777 int leftoversize = store_unique_xor(laststore, vecMax, buffer);
1778 uint16_t vec7 = _mm_extract_epi16(vecMax, 7);
1779 uint16_t vec6 = _mm_extract_epi16(vecMax, 6);
1780 if (vec7 != vec6) buffer[leftoversize++] = vec7;
1781 if (pos1 == len1) {
1782 memcpy(buffer + leftoversize, array1 + 8 * pos1,
1783 (length1 - 8 * len1) * sizeof(uint16_t));
1784 leftoversize += length1 - 8 * len1;
1785 if (leftoversize == 0) { // trivial case
1786 memcpy(output, array2 + 8 * pos2,
1787 (length2 - 8 * pos2) * sizeof(uint16_t));
1788 len += (length2 - 8 * pos2);
1789 } else {
1790 qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare);
1791 leftoversize = unique_xor(buffer, leftoversize);
1792 len += xor_uint16(buffer, leftoversize, array2 + 8 * pos2,
1793 length2 - 8 * pos2, output);
1794 }
1795 } else {
1796 memcpy(buffer + leftoversize, array2 + 8 * pos2,
1797 (length2 - 8 * len2) * sizeof(uint16_t));
1798 leftoversize += length2 - 8 * len2;
1799 if (leftoversize == 0) { // trivial case
1800 memcpy(output, array1 + 8 * pos1,
1801 (length1 - 8 * pos1) * sizeof(uint16_t));
1802 len += (length1 - 8 * pos1);
1803 } else {
1804 qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare);
1805 leftoversize = unique_xor(buffer, leftoversize);
1806 len += xor_uint16(buffer, leftoversize, array1 + 8 * pos1,
1807 length1 - 8 * pos1, output);
1808 }
1809 }
1810 return len;
1811}
1812
1813/**
1814 * End of SIMD 16-bit XOR code
1815 */
1816
1817#endif // USESSE4
1818
1819size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2,
1820 size_t size_2, uint32_t *buffer) {
1821 size_t pos = 0, idx_1 = 0, idx_2 = 0;
1822
1823 if (0 == size_2) {
1824 memmove(dest: buffer, src: set_1, n: size_1 * sizeof(uint32_t));
1825 return size_1;
1826 }
1827 if (0 == size_1) {
1828 memmove(dest: buffer, src: set_2, n: size_2 * sizeof(uint32_t));
1829 return size_2;
1830 }
1831
1832 uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2];
1833
1834 while (true) {
1835 if (val_1 < val_2) {
1836 buffer[pos++] = val_1;
1837 ++idx_1;
1838 if (idx_1 >= size_1) break;
1839 val_1 = set_1[idx_1];
1840 } else if (val_2 < val_1) {
1841 buffer[pos++] = val_2;
1842 ++idx_2;
1843 if (idx_2 >= size_2) break;
1844 val_2 = set_2[idx_2];
1845 } else {
1846 buffer[pos++] = val_1;
1847 ++idx_1;
1848 ++idx_2;
1849 if (idx_1 >= size_1 || idx_2 >= size_2) break;
1850 val_1 = set_1[idx_1];
1851 val_2 = set_2[idx_2];
1852 }
1853 }
1854
1855 if (idx_1 < size_1) {
1856 const size_t n_elems = size_1 - idx_1;
1857 memmove(dest: buffer + pos, src: set_1 + idx_1, n: n_elems * sizeof(uint32_t));
1858 pos += n_elems;
1859 } else if (idx_2 < size_2) {
1860 const size_t n_elems = size_2 - idx_2;
1861 memmove(dest: buffer + pos, src: set_2 + idx_2, n: n_elems * sizeof(uint32_t));
1862 pos += n_elems;
1863 }
1864
1865 return pos;
1866}
1867
1868size_t union_uint32_card(const uint32_t *set_1, size_t size_1,
1869 const uint32_t *set_2, size_t size_2) {
1870 size_t pos = 0, idx_1 = 0, idx_2 = 0;
1871
1872 if (0 == size_2) {
1873 return size_1;
1874 }
1875 if (0 == size_1) {
1876 return size_2;
1877 }
1878
1879 uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2];
1880
1881 while (true) {
1882 if (val_1 < val_2) {
1883 ++idx_1;
1884 ++pos;
1885 if (idx_1 >= size_1) break;
1886 val_1 = set_1[idx_1];
1887 } else if (val_2 < val_1) {
1888 ++idx_2;
1889 ++pos;
1890 if (idx_2 >= size_2) break;
1891 val_2 = set_2[idx_2];
1892 } else {
1893 ++idx_1;
1894 ++idx_2;
1895 ++pos;
1896 if (idx_1 >= size_1 || idx_2 >= size_2) break;
1897 val_1 = set_1[idx_1];
1898 val_2 = set_2[idx_2];
1899 }
1900 }
1901
1902 if (idx_1 < size_1) {
1903 const size_t n_elems = size_1 - idx_1;
1904 pos += n_elems;
1905 } else if (idx_2 < size_2) {
1906 const size_t n_elems = size_2 - idx_2;
1907 pos += n_elems;
1908 }
1909 return pos;
1910}
1911
1912
1913
1914size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
1915 size_t size_2, uint16_t *buffer) {
1916#ifdef ROARING_VECTOR_OPERATIONS_ENABLED
1917 // compute union with smallest array first
1918 if (size_1 < size_2) {
1919 return union_vector16(set_1, (uint32_t)size_1,
1920 set_2, (uint32_t)size_2, buffer);
1921 } else {
1922 return union_vector16(set_2, (uint32_t)size_2,
1923 set_1, (uint32_t)size_1, buffer);
1924 }
1925#else
1926 // compute union with smallest array first
1927 if (size_1 < size_2) {
1928 return union_uint16(
1929 set_1, size_1, set_2, size_2, buffer);
1930 } else {
1931 return union_uint16(
1932 set_1: set_2, size_1: size_2, set_2: set_1, size_2: size_1, buffer);
1933 }
1934#endif
1935}
1936
1937bool memequals(const void *s1, const void *s2, size_t n) {
1938 if (n == 0) {
1939 return true;
1940 }
1941#ifdef USEAVX
1942 const uint8_t *ptr1 = (const uint8_t *)s1;
1943 const uint8_t *ptr2 = (const uint8_t *)s2;
1944 const uint8_t *end1 = ptr1 + n;
1945 const uint8_t *end8 = ptr1 + n/8*8;
1946 const uint8_t *end32 = ptr1 + n/32*32;
1947
1948 while (ptr1 < end32) {
1949 __m256i r1 = _mm256_loadu_si256((const __m256i*)ptr1);
1950 __m256i r2 = _mm256_loadu_si256((const __m256i*)ptr2);
1951 int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2));
1952 if ((uint32_t)mask != UINT32_MAX) {
1953 return false;
1954 }
1955 ptr1 += 32;
1956 ptr2 += 32;
1957 }
1958
1959 while (ptr1 < end8) {
1960 uint64_t v1 = *((const uint64_t*)ptr1);
1961 uint64_t v2 = *((const uint64_t*)ptr2);
1962 if (v1 != v2) {
1963 return false;
1964 }
1965 ptr1 += 8;
1966 ptr2 += 8;
1967 }
1968
1969 while (ptr1 < end1) {
1970 if (*ptr1 != *ptr2) {
1971 return false;
1972 }
1973 ptr1++;
1974 ptr2++;
1975 }
1976
1977 return true;
1978#else
1979 return memcmp(s1: s1, s2: s2, n: n) == 0;
1980#endif
1981}
1982/* end file src/array_util.c */
1983/* begin file src/bitset_util.c */
1984#include <assert.h>
1985#include <stdint.h>
1986#include <stdio.h>
1987#include <stdlib.h>
1988#include <string.h>
1989
1990
1991#ifdef IS_X64
1992static uint8_t lengthTable[256] = {
1993 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
1994 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1995 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
1996 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1997 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
1998 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1999 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
2000 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2001 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
2002 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2003 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
2004#endif
2005
2006#ifdef USEAVX
2007ALIGNED(32)
2008static uint32_t vecDecodeTable[256][8] = {
2009 {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */
2010 {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */
2011 {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */
2012 {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */
2013 {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */
2014 {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */
2015 {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */
2016 {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */
2017 {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */
2018 {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */
2019 {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */
2020 {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */
2021 {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */
2022 {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */
2023 {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */
2024 {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */
2025 {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */
2026 {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */
2027 {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */
2028 {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */
2029 {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */
2030 {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */
2031 {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */
2032 {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */
2033 {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */
2034 {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */
2035 {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */
2036 {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */
2037 {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */
2038 {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */
2039 {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */
2040 {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */
2041 {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */
2042 {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */
2043 {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */
2044 {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */
2045 {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */
2046 {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */
2047 {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */
2048 {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */
2049 {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */
2050 {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */
2051 {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */
2052 {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */
2053 {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */
2054 {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */
2055 {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */
2056 {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */
2057 {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */
2058 {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */
2059 {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */
2060 {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */
2061 {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */
2062 {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */
2063 {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */
2064 {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */
2065 {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */
2066 {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */
2067 {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */
2068 {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */
2069 {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */
2070 {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */
2071 {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */
2072 {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */
2073 {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */
2074 {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */
2075 {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */
2076 {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */
2077 {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */
2078 {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */
2079 {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */
2080 {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */
2081 {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */
2082 {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */
2083 {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */
2084 {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */
2085 {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */
2086 {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */
2087 {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */
2088 {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */
2089 {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */
2090 {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */
2091 {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */
2092 {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */
2093 {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */
2094 {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */
2095 {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */
2096 {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */
2097 {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */
2098 {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */
2099 {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */
2100 {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */
2101 {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */
2102 {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */
2103 {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */
2104 {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */
2105 {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */
2106 {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */
2107 {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */
2108 {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */
2109 {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */
2110 {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */
2111 {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */
2112 {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */
2113 {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */
2114 {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */
2115 {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */
2116 {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */
2117 {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */
2118 {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */
2119 {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */
2120 {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */
2121 {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */
2122 {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */
2123 {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */
2124 {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */
2125 {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */
2126 {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */
2127 {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */
2128 {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */
2129 {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */
2130 {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */
2131 {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */
2132 {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */
2133 {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */
2134 {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */
2135 {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */
2136 {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */
2137 {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */
2138 {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */
2139 {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */
2140 {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */
2141 {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */
2142 {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */
2143 {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */
2144 {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */
2145 {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */
2146 {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */
2147 {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */
2148 {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */
2149 {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */
2150 {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */
2151 {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */
2152 {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */
2153 {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */
2154 {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */
2155 {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */
2156 {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */
2157 {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */
2158 {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */
2159 {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */
2160 {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */
2161 {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */
2162 {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */
2163 {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */
2164 {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */
2165 {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */
2166 {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */
2167 {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */
2168 {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */
2169 {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */
2170 {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */
2171 {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */
2172 {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */
2173 {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */
2174 {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */
2175 {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */
2176 {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */
2177 {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */
2178 {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */
2179 {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */
2180 {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */
2181 {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */
2182 {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */
2183 {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */
2184 {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */
2185 {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */
2186 {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */
2187 {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */
2188 {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */
2189 {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */
2190 {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */
2191 {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */
2192 {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */
2193 {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */
2194 {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */
2195 {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */
2196 {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */
2197 {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */
2198 {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */
2199 {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */
2200 {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */
2201 {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */
2202 {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */
2203 {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */
2204 {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */
2205 {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */
2206 {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */
2207 {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */
2208 {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */
2209 {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */
2210 {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */
2211 {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */
2212 {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */
2213 {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */
2214 {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */
2215 {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */
2216 {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */
2217 {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */
2218 {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */
2219 {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */
2220 {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */
2221 {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */
2222 {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */
2223 {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */
2224 {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */
2225 {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */
2226 {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */
2227 {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */
2228 {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */
2229 {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */
2230 {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */
2231 {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */
2232 {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */
2233 {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */
2234 {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */
2235 {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */
2236 {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */
2237 {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */
2238 {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */
2239 {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */
2240 {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */
2241 {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */
2242 {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */
2243 {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */
2244 {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */
2245 {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */
2246 {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */
2247 {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */
2248 {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */
2249 {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */
2250 {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */
2251 {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */
2252 {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */
2253 {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */
2254 {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */
2255 {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */
2256 {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */
2257 {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */
2258 {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */
2259 {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */
2260 {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */
2261 {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */
2262 {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */
2263 {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */
2264 {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */
2265};
2266
2267#endif // #ifdef USEAVX
2268
2269#ifdef IS_X64
2270// same as vecDecodeTable but in 16 bits
2271ALIGNED(32)
2272static uint16_t vecDecodeTable_uint16[256][8] = {
2273 {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */
2274 {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */
2275 {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */
2276 {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */
2277 {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */
2278 {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */
2279 {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */
2280 {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */
2281 {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */
2282 {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */
2283 {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */
2284 {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */
2285 {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */
2286 {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */
2287 {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */
2288 {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */
2289 {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */
2290 {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */
2291 {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */
2292 {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */
2293 {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */
2294 {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */
2295 {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */
2296 {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */
2297 {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */
2298 {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */
2299 {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */
2300 {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */
2301 {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */
2302 {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */
2303 {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */
2304 {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */
2305 {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */
2306 {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */
2307 {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */
2308 {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */
2309 {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */
2310 {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */
2311 {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */
2312 {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */
2313 {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */
2314 {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */
2315 {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */
2316 {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */
2317 {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */
2318 {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */
2319 {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */
2320 {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */
2321 {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */
2322 {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */
2323 {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */
2324 {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */
2325 {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */
2326 {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */
2327 {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */
2328 {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */
2329 {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */
2330 {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */
2331 {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */
2332 {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */
2333 {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */
2334 {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */
2335 {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */
2336 {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */
2337 {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */
2338 {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */
2339 {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */
2340 {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */
2341 {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */
2342 {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */
2343 {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */
2344 {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */
2345 {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */
2346 {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */
2347 {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */
2348 {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */
2349 {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */
2350 {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */
2351 {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */
2352 {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */
2353 {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */
2354 {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */
2355 {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */
2356 {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */
2357 {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */
2358 {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */
2359 {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */
2360 {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */
2361 {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */
2362 {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */
2363 {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */
2364 {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */
2365 {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */
2366 {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */
2367 {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */
2368 {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */
2369 {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */
2370 {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */
2371 {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */
2372 {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */
2373 {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */
2374 {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */
2375 {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */
2376 {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */
2377 {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */
2378 {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */
2379 {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */
2380 {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */
2381 {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */
2382 {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */
2383 {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */
2384 {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */
2385 {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */
2386 {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */
2387 {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */
2388 {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */
2389 {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */
2390 {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */
2391 {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */
2392 {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */
2393 {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */
2394 {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */
2395 {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */
2396 {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */
2397 {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */
2398 {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */
2399 {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */
2400 {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */
2401 {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */
2402 {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */
2403 {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */
2404 {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */
2405 {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */
2406 {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */
2407 {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */
2408 {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */
2409 {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */
2410 {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */
2411 {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */
2412 {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */
2413 {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */
2414 {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */
2415 {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */
2416 {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */
2417 {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */
2418 {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */
2419 {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */
2420 {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */
2421 {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */
2422 {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */
2423 {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */
2424 {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */
2425 {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */
2426 {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */
2427 {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */
2428 {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */
2429 {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */
2430 {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */
2431 {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */
2432 {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */
2433 {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */
2434 {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */
2435 {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */
2436 {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */
2437 {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */
2438 {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */
2439 {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */
2440 {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */
2441 {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */
2442 {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */
2443 {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */
2444 {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */
2445 {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */
2446 {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */
2447 {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */
2448 {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */
2449 {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */
2450 {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */
2451 {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */
2452 {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */
2453 {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */
2454 {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */
2455 {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */
2456 {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */
2457 {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */
2458 {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */
2459 {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */
2460 {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */
2461 {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */
2462 {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */
2463 {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */
2464 {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */
2465 {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */
2466 {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */
2467 {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */
2468 {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */
2469 {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */
2470 {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */
2471 {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */
2472 {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */
2473 {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */
2474 {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */
2475 {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */
2476 {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */
2477 {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */
2478 {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */
2479 {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */
2480 {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */
2481 {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */
2482 {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */
2483 {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */
2484 {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */
2485 {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */
2486 {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */
2487 {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */
2488 {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */
2489 {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */
2490 {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */
2491 {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */
2492 {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */
2493 {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */
2494 {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */
2495 {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */
2496 {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */
2497 {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */
2498 {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */
2499 {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */
2500 {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */
2501 {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */
2502 {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */
2503 {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */
2504 {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */
2505 {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */
2506 {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */
2507 {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */
2508 {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */
2509 {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */
2510 {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */
2511 {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */
2512 {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */
2513 {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */
2514 {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */
2515 {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */
2516 {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */
2517 {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */
2518 {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */
2519 {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */
2520 {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */
2521 {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */
2522 {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */
2523 {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */
2524 {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */
2525 {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */
2526 {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */
2527 {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */
2528 {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */
2529};
2530
2531#endif
2532
2533#ifdef USEAVX
2534
2535size_t bitset_extract_setbits_avx2(uint64_t *array, size_t length, void *vout,
2536 size_t outcapacity, uint32_t base) {
2537 uint32_t *out = (uint32_t *)vout;
2538 uint32_t *initout = out;
2539 __m256i baseVec = _mm256_set1_epi32(base - 1);
2540 __m256i incVec = _mm256_set1_epi32(64);
2541 __m256i add8 = _mm256_set1_epi32(8);
2542 uint32_t *safeout = out + outcapacity;
2543 size_t i = 0;
2544 for (; (i < length) && (out + 64 <= safeout); ++i) {
2545 uint64_t w = array[i];
2546 if (w == 0) {
2547 baseVec = _mm256_add_epi32(baseVec, incVec);
2548 } else {
2549 for (int k = 0; k < 4; ++k) {
2550 uint8_t byteA = (uint8_t)w;
2551 uint8_t byteB = (uint8_t)(w >> 8);
2552 w >>= 16;
2553 __m256i vecA =
2554 _mm256_load_si256((const __m256i *)vecDecodeTable[byteA]);
2555 __m256i vecB =
2556 _mm256_load_si256((const __m256i *)vecDecodeTable[byteB]);
2557 uint8_t advanceA = lengthTable[byteA];
2558 uint8_t advanceB = lengthTable[byteB];
2559 vecA = _mm256_add_epi32(baseVec, vecA);
2560 baseVec = _mm256_add_epi32(baseVec, add8);
2561 vecB = _mm256_add_epi32(baseVec, vecB);
2562 baseVec = _mm256_add_epi32(baseVec, add8);
2563 _mm256_storeu_si256((__m256i *)out, vecA);
2564 out += advanceA;
2565 _mm256_storeu_si256((__m256i *)out, vecB);
2566 out += advanceB;
2567 }
2568 }
2569 }
2570 base += i * 64;
2571 for (; (i < length) && (out < safeout); ++i) {
2572 uint64_t w = array[i];
2573 while ((w != 0) && (out < safeout)) {
2574 uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
2575 int r = __builtin_ctzll(w); // on x64, should compile to TZCNT
2576 uint32_t val = r + base;
2577 memcpy(out, &val,
2578 sizeof(uint32_t)); // should be compiled as a MOV on x64
2579 out++;
2580 w ^= t;
2581 }
2582 base += 64;
2583 }
2584 return out - initout;
2585}
2586#endif // USEAVX
2587
2588size_t bitset_extract_setbits(uint64_t *bitset, size_t length, void *vout,
2589 uint32_t base) {
2590 int outpos = 0;
2591 uint32_t *out = (uint32_t *)vout;
2592 for (size_t i = 0; i < length; ++i) {
2593 uint64_t w = bitset[i];
2594 while (w != 0) {
2595 uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
2596 int r = __builtin_ctzll(w); // on x64, should compile to TZCNT
2597 uint32_t val = r + base;
2598 memcpy(dest: out + outpos, src: &val,
2599 n: sizeof(uint32_t)); // should be compiled as a MOV on x64
2600 outpos++;
2601 w ^= t;
2602 }
2603 base += 64;
2604 }
2605 return outpos;
2606}
2607
2608size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ bitset1,
2609 const uint64_t * __restrict__ bitset2,
2610 size_t length, uint16_t *out,
2611 uint16_t base) {
2612 int outpos = 0;
2613 for (size_t i = 0; i < length; ++i) {
2614 uint64_t w = bitset1[i] & bitset2[i];
2615 while (w != 0) {
2616 uint64_t t = w & (~w + 1);
2617 int r = __builtin_ctzll(w);
2618 out[outpos++] = r + base;
2619 w ^= t;
2620 }
2621 base += 64;
2622 }
2623 return outpos;
2624}
2625
2626#ifdef IS_X64
2627/*
2628 * Given a bitset containing "length" 64-bit words, write out the position
2629 * of all the set bits to "out" as 16-bit integers, values start at "base" (can
2630 *be set to zero).
2631 *
2632 * The "out" pointer should be sufficient to store the actual number of bits
2633 *set.
2634 *
2635 * Returns how many values were actually decoded.
2636 *
2637 * This function uses SSE decoding.
2638 */
2639size_t bitset_extract_setbits_sse_uint16(const uint64_t *bitset, size_t length,
2640 uint16_t *out, size_t outcapacity,
2641 uint16_t base) {
2642 uint16_t *initout = out;
2643 __m128i baseVec = _mm_set1_epi16(w: base - 1);
2644 __m128i incVec = _mm_set1_epi16(w: 64);
2645 __m128i add8 = _mm_set1_epi16(w: 8);
2646 uint16_t *safeout = out + outcapacity;
2647 const int numberofbytes = 2; // process two bytes at a time
2648 size_t i = 0;
2649 for (; (i < length) && (out + numberofbytes * 8 <= safeout); ++i) {
2650 uint64_t w = bitset[i];
2651 if (w == 0) {
2652 baseVec = _mm_add_epi16(a: baseVec, b: incVec);
2653 } else {
2654 for (int k = 0; k < 4; ++k) {
2655 uint8_t byteA = (uint8_t)w;
2656 uint8_t byteB = (uint8_t)(w >> 8);
2657 w >>= 16;
2658 __m128i vecA = _mm_load_si128(
2659 p: (const __m128i *)vecDecodeTable_uint16[byteA]);
2660 __m128i vecB = _mm_load_si128(
2661 p: (const __m128i *)vecDecodeTable_uint16[byteB]);
2662 uint8_t advanceA = lengthTable[byteA];
2663 uint8_t advanceB = lengthTable[byteB];
2664 vecA = _mm_add_epi16(a: baseVec, b: vecA);
2665 baseVec = _mm_add_epi16(a: baseVec, b: add8);
2666 vecB = _mm_add_epi16(a: baseVec, b: vecB);
2667 baseVec = _mm_add_epi16(a: baseVec, b: add8);
2668 _mm_storeu_si128(p: (__m128i *)out, b: vecA);
2669 out += advanceA;
2670 _mm_storeu_si128(p: (__m128i *)out, b: vecB);
2671 out += advanceB;
2672 }
2673 }
2674 }
2675 base += (uint16_t)(i * 64);
2676 for (; (i < length) && (out < safeout); ++i) {
2677 uint64_t w = bitset[i];
2678 while ((w != 0) && (out < safeout)) {
2679 uint64_t t = w & (~w + 1);
2680 int r = __builtin_ctzll(w);
2681 *out = r + base;
2682 out++;
2683 w ^= t;
2684 }
2685 base += 64;
2686 }
2687 return out - initout;
2688}
2689#endif
2690
2691/*
2692 * Given a bitset containing "length" 64-bit words, write out the position
2693 * of all the set bits to "out", values start at "base" (can be set to zero).
2694 *
2695 * The "out" pointer should be sufficient to store the actual number of bits
2696 *set.
2697 *
2698 * Returns how many values were actually decoded.
2699 */
2700size_t bitset_extract_setbits_uint16(const uint64_t *bitset, size_t length,
2701 uint16_t *out, uint16_t base) {
2702 int outpos = 0;
2703 for (size_t i = 0; i < length; ++i) {
2704 uint64_t w = bitset[i];
2705 while (w != 0) {
2706 uint64_t t = w & (~w + 1);
2707 int r = __builtin_ctzll(w);
2708 out[outpos++] = r + base;
2709 w ^= t;
2710 }
2711 base += 64;
2712 }
2713 return outpos;
2714}
2715
2716#if defined(ASMBITMANIPOPTIMIZATION)
2717
2718uint64_t bitset_set_list_withcard(void *bitset, uint64_t card,
2719 const uint16_t *list, uint64_t length) {
2720 uint64_t offset, load, pos;
2721 uint64_t shift = 6;
2722 const uint16_t *end = list + length;
2723 if (!length) return card;
2724 // TODO: could unroll for performance, see bitset_set_list
2725 // bts is not available as an intrinsic in GCC
2726 __asm volatile(
2727 "1:\n"
2728 "movzwq (%[list]), %[pos]\n"
2729 "shrx %[shift], %[pos], %[offset]\n"
2730 "mov (%[bitset],%[offset],8), %[load]\n"
2731 "bts %[pos], %[load]\n"
2732 "mov %[load], (%[bitset],%[offset],8)\n"
2733 "sbb $-1, %[card]\n"
2734 "add $2, %[list]\n"
2735 "cmp %[list], %[end]\n"
2736 "jnz 1b"
2737 : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load),
2738 [pos] "=&r"(pos), [offset] "=&r"(offset)
2739 : [end] "r"(end), [bitset] "r"(bitset), [shift] "r"(shift));
2740 return card;
2741}
2742
2743void bitset_set_list(void *bitset, const uint16_t *list, uint64_t length) {
2744 uint64_t pos;
2745 const uint16_t *end = list + length;
2746
2747 uint64_t shift = 6;
2748 uint64_t offset;
2749 uint64_t load;
2750 for (; list + 3 < end; list += 4) {
2751 pos = list[0];
2752 __asm volatile(
2753 "shrx %[shift], %[pos], %[offset]\n"
2754 "mov (%[bitset],%[offset],8), %[load]\n"
2755 "bts %[pos], %[load]\n"
2756 "mov %[load], (%[bitset],%[offset],8)"
2757 : [load] "=&r"(load), [offset] "=&r"(offset)
2758 : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos));
2759 pos = list[1];
2760 __asm volatile(
2761 "shrx %[shift], %[pos], %[offset]\n"
2762 "mov (%[bitset],%[offset],8), %[load]\n"
2763 "bts %[pos], %[load]\n"
2764 "mov %[load], (%[bitset],%[offset],8)"
2765 : [load] "=&r"(load), [offset] "=&r"(offset)
2766 : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos));
2767 pos = list[2];
2768 __asm volatile(
2769 "shrx %[shift], %[pos], %[offset]\n"
2770 "mov (%[bitset],%[offset],8), %[load]\n"
2771 "bts %[pos], %[load]\n"
2772 "mov %[load], (%[bitset],%[offset],8)"
2773 : [load] "=&r"(load), [offset] "=&r"(offset)
2774 : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos));
2775 pos = list[3];
2776 __asm volatile(
2777 "shrx %[shift], %[pos], %[offset]\n"
2778 "mov (%[bitset],%[offset],8), %[load]\n"
2779 "bts %[pos], %[load]\n"
2780 "mov %[load], (%[bitset],%[offset],8)"
2781 : [load] "=&r"(load), [offset] "=&r"(offset)
2782 : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos));
2783 }
2784
2785 while (list != end) {
2786 pos = list[0];
2787 __asm volatile(
2788 "shrx %[shift], %[pos], %[offset]\n"
2789 "mov (%[bitset],%[offset],8), %[load]\n"
2790 "bts %[pos], %[load]\n"
2791 "mov %[load], (%[bitset],%[offset],8)"
2792 : [load] "=&r"(load), [offset] "=&r"(offset)
2793 : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos));
2794 list++;
2795 }
2796}
2797
2798uint64_t bitset_clear_list(void *bitset, uint64_t card, const uint16_t *list,
2799 uint64_t length) {
2800 uint64_t offset, load, pos;
2801 uint64_t shift = 6;
2802 const uint16_t *end = list + length;
2803 if (!length) return card;
2804 // btr is not available as an intrinsic in GCC
2805 __asm volatile(
2806 "1:\n"
2807 "movzwq (%[list]), %[pos]\n"
2808 "shrx %[shift], %[pos], %[offset]\n"
2809 "mov (%[bitset],%[offset],8), %[load]\n"
2810 "btr %[pos], %[load]\n"
2811 "mov %[load], (%[bitset],%[offset],8)\n"
2812 "sbb $0, %[card]\n"
2813 "add $2, %[list]\n"
2814 "cmp %[list], %[end]\n"
2815 "jnz 1b"
2816 : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load),
2817 [pos] "=&r"(pos), [offset] "=&r"(offset)
2818 : [end] "r"(end), [bitset] "r"(bitset), [shift] "r"(shift)
2819 :
2820 /* clobbers */ "memory");
2821 return card;
2822}
2823
2824#else
2825uint64_t bitset_clear_list(void *bitset, uint64_t card, const uint16_t *list,
2826 uint64_t length) {
2827 uint64_t offset, load, newload, pos, index;
2828 const uint16_t *end = list + length;
2829 while (list != end) {
2830 pos = *(const uint16_t *)list;
2831 offset = pos >> 6;
2832 index = pos % 64;
2833 load = ((uint64_t *)bitset)[offset];
2834 newload = load & ~(UINT64_C(1) << index);
2835 card -= (load ^ newload) >> index;
2836 ((uint64_t *)bitset)[offset] = newload;
2837 list++;
2838 }
2839 return card;
2840}
2841
2842uint64_t bitset_set_list_withcard(void *bitset, uint64_t card,
2843 const uint16_t *list, uint64_t length) {
2844 uint64_t offset, load, newload, pos, index;
2845 const uint16_t *end = list + length;
2846 while (list != end) {
2847 pos = *(const uint16_t *)list;
2848 offset = pos >> 6;
2849 index = pos % 64;
2850 load = ((uint64_t *)bitset)[offset];
2851 newload = load | (UINT64_C(1) << index);
2852 card += (load ^ newload) >> index;
2853 ((uint64_t *)bitset)[offset] = newload;
2854 list++;
2855 }
2856 return card;
2857}
2858
2859void bitset_set_list(void *bitset, const uint16_t *list, uint64_t length) {
2860 uint64_t offset, load, newload, pos, index;
2861 const uint16_t *end = list + length;
2862 while (list != end) {
2863 pos = *(const uint16_t *)list;
2864 offset = pos >> 6;
2865 index = pos % 64;
2866 load = ((uint64_t *)bitset)[offset];
2867 newload = load | (UINT64_C(1) << index);
2868 ((uint64_t *)bitset)[offset] = newload;
2869 list++;
2870 }
2871}
2872
2873#endif
2874
2875/* flip specified bits */
2876/* TODO: consider whether worthwhile to make an asm version */
2877
2878uint64_t bitset_flip_list_withcard(void *bitset, uint64_t card,
2879 const uint16_t *list, uint64_t length) {
2880 uint64_t offset, load, newload, pos, index;
2881 const uint16_t *end = list + length;
2882 while (list != end) {
2883 pos = *(const uint16_t *)list;
2884 offset = pos >> 6;
2885 index = pos % 64;
2886 load = ((uint64_t *)bitset)[offset];
2887 newload = load ^ (UINT64_C(1) << index);
2888 // todo: is a branch here all that bad?
2889 card +=
2890 (1 - 2 * (((UINT64_C(1) << index) & load) >> index)); // +1 or -1
2891 ((uint64_t *)bitset)[offset] = newload;
2892 list++;
2893 }
2894 return card;
2895}
2896
2897void bitset_flip_list(void *bitset, const uint16_t *list, uint64_t length) {
2898 uint64_t offset, load, newload, pos, index;
2899 const uint16_t *end = list + length;
2900 while (list != end) {
2901 pos = *(const uint16_t *)list;
2902 offset = pos >> 6;
2903 index = pos % 64;
2904 load = ((uint64_t *)bitset)[offset];
2905 newload = load ^ (UINT64_C(1) << index);
2906 ((uint64_t *)bitset)[offset] = newload;
2907 list++;
2908 }
2909}
2910/* end file src/bitset_util.c */
2911/* begin file src/containers/array.c */
2912/*
2913 * array.c
2914 *
2915 */
2916
2917#include <assert.h>
2918#include <stdio.h>
2919#include <stdlib.h>
2920
2921/* Create a new array with capacity size. Return NULL in case of failure. */
2922array_container_t *array_container_create_given_capacity(int32_t size) {
2923 array_container_t *container;
2924
2925 container = (array_container_t *)malloc(size: sizeof(array_container_t));
2926 assert (container);
2927
2928 if( size <= 0 ) { // we don't want to rely on malloc(0)
2929 container->array = NULL;
2930 } else {
2931 container->array = (uint16_t *)malloc(size: sizeof(uint16_t) * size);
2932 assert (container->array);
2933 }
2934
2935 container->capacity = size;
2936 container->cardinality = 0;
2937
2938 return container;
2939}
2940
2941/* Create a new array. Return NULL in case of failure. */
2942array_container_t *array_container_create(void) {
2943 return array_container_create_given_capacity(size: ARRAY_DEFAULT_INIT_SIZE);
2944}
2945
2946/* Create a new array containing all values in [min,max). */
2947array_container_t * array_container_create_range(uint32_t min, uint32_t max) {
2948 array_container_t * answer = array_container_create_given_capacity(size: max - min + 1);
2949 if(answer == NULL) return answer;
2950 answer->cardinality = 0;
2951 for(uint32_t k = min; k < max; k++) {
2952 answer->array[answer->cardinality++] = k;
2953 }
2954 return answer;
2955}
2956
2957/* Duplicate container */
2958array_container_t *array_container_clone(const array_container_t *src) {
2959 array_container_t *newcontainer =
2960 array_container_create_given_capacity(size: src->capacity);
2961 if (newcontainer == NULL) return NULL;
2962
2963 newcontainer->cardinality = src->cardinality;
2964
2965 memcpy(dest: newcontainer->array, src: src->array,
2966 n: src->cardinality * sizeof(uint16_t));
2967
2968 return newcontainer;
2969}
2970
2971int array_container_shrink_to_fit(array_container_t *src) {
2972 if (src->cardinality == src->capacity) return 0; // nothing to do
2973 int savings = src->capacity - src->cardinality;
2974 src->capacity = src->cardinality;
2975 if( src->capacity == 0) { // we do not want to rely on realloc for zero allocs
2976 free(ptr: src->array);
2977 src->array = NULL;
2978 } else {
2979 uint16_t *oldarray = src->array;
2980 src->array =
2981 (uint16_t *)realloc(ptr: oldarray, size: src->capacity * sizeof(uint16_t));
2982 if (src->array == NULL) free(ptr: oldarray); // should never happen?
2983 }
2984 return savings;
2985}
2986
2987/* Free memory. */
2988void array_container_free(array_container_t *arr) {
2989 if(arr->array != NULL) {// Jon Strabala reports that some tools complain otherwise
2990 free(ptr: arr->array);
2991 arr->array = NULL; // pedantic
2992 }
2993 free(ptr: arr);
2994}
2995
2996static inline int32_t grow_capacity(int32_t capacity) {
2997 return (capacity <= 0) ? ARRAY_DEFAULT_INIT_SIZE
2998 : capacity < 64 ? capacity * 2
2999 : capacity < 1024 ? capacity * 3 / 2
3000 : capacity * 5 / 4;
3001}
3002
3003static inline int32_t clamp(int32_t val, int32_t min, int32_t max) {
3004 return ((val < min) ? min : (val > max) ? max : val);
3005}
3006
3007void array_container_grow(array_container_t *container, int32_t min,
3008 bool preserve) {
3009
3010 int32_t max = (min <= DEFAULT_MAX_SIZE ? DEFAULT_MAX_SIZE : 65536);
3011 int32_t new_capacity = clamp(val: grow_capacity(capacity: container->capacity), min, max);
3012
3013 container->capacity = new_capacity;
3014 uint16_t *array = container->array;
3015
3016 if (preserve) {
3017 container->array =
3018 (uint16_t *)realloc(ptr: array, size: new_capacity * sizeof(uint16_t));
3019 if (container->array == NULL) free(ptr: array);
3020 } else {
3021 // Jon Strabala reports that some tools complain otherwise
3022 if (array != NULL) {
3023 free(ptr: array);
3024 }
3025 container->array = (uint16_t *)malloc(size: new_capacity * sizeof(uint16_t));
3026 }
3027
3028 // handle the case where realloc fails
3029 if (container->array == NULL) {
3030 fprintf(stderr, format: "could not allocate memory\n");
3031 }
3032 assert(container->array != NULL);
3033}
3034
3035/* Copy one container into another. We assume that they are distinct. */
3036void array_container_copy(const array_container_t *src,
3037 array_container_t *dst) {
3038 const int32_t cardinality = src->cardinality;
3039 if (cardinality > dst->capacity) {
3040 array_container_grow(container: dst, min: cardinality, false);
3041 }
3042
3043 dst->cardinality = cardinality;
3044 memcpy(dest: dst->array, src: src->array, n: cardinality * sizeof(uint16_t));
3045}
3046
3047void array_container_add_from_range(array_container_t *arr, uint32_t min,
3048 uint32_t max, uint16_t step) {
3049 for (uint32_t value = min; value < max; value += step) {
3050 array_container_append(arr, pos: value);
3051 }
3052}
3053
3054/* Computes the union of array1 and array2 and write the result to arrayout.
3055 * It is assumed that arrayout is distinct from both array1 and array2.
3056 */
3057void array_container_union(const array_container_t *array_1,
3058 const array_container_t *array_2,
3059 array_container_t *out) {
3060 const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality;
3061 const int32_t max_cardinality = card_1 + card_2;
3062
3063 if (out->capacity < max_cardinality) {
3064 array_container_grow(container: out, min: max_cardinality, false);
3065 }
3066 out->cardinality = (int32_t)fast_union_uint16(set_1: array_1->array, size_1: card_1,
3067 set_2: array_2->array, size_2: card_2, buffer: out->array);
3068
3069}
3070
3071/* Computes the difference of array1 and array2 and write the result
3072 * to array out.
3073 * Array out does not need to be distinct from array_1
3074 */
3075void array_container_andnot(const array_container_t *array_1,
3076 const array_container_t *array_2,
3077 array_container_t *out) {
3078 if (out->capacity < array_1->cardinality)
3079 array_container_grow(container: out, min: array_1->cardinality, false);
3080#ifdef ROARING_VECTOR_OPERATIONS_ENABLED
3081 if((out != array_1) && (out != array_2)) {
3082 out->cardinality =
3083 difference_vector16(array_1->array, array_1->cardinality,
3084 array_2->array, array_2->cardinality, out->array);
3085 } else {
3086 out->cardinality =
3087 difference_uint16(array_1->array, array_1->cardinality, array_2->array,
3088 array_2->cardinality, out->array);
3089 }
3090#else
3091 out->cardinality =
3092 difference_uint16(a1: array_1->array, length1: array_1->cardinality, a2: array_2->array,
3093 length2: array_2->cardinality, a_out: out->array);
3094#endif
3095}
3096
3097/* Computes the symmetric difference of array1 and array2 and write the
3098 * result
3099 * to arrayout.
3100 * It is assumed that arrayout is distinct from both array1 and array2.
3101 */
3102void array_container_xor(const array_container_t *array_1,
3103 const array_container_t *array_2,
3104 array_container_t *out) {
3105 const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality;
3106 const int32_t max_cardinality = card_1 + card_2;
3107 if (out->capacity < max_cardinality) {
3108 array_container_grow(container: out, min: max_cardinality, false);
3109 }
3110
3111#ifdef ROARING_VECTOR_OPERATIONS_ENABLED
3112 out->cardinality =
3113 xor_vector16(array_1->array, array_1->cardinality, array_2->array,
3114 array_2->cardinality, out->array);
3115#else
3116 out->cardinality =
3117 xor_uint16(array_1: array_1->array, card_1: array_1->cardinality, array_2: array_2->array,
3118 card_2: array_2->cardinality, out: out->array);
3119#endif
3120}
3121
3122static inline int32_t minimum_int32(int32_t a, int32_t b) {
3123 return (a < b) ? a : b;
3124}
3125
3126/* computes the intersection of array1 and array2 and write the result to
3127 * arrayout.
3128 * It is assumed that arrayout is distinct from both array1 and array2.
3129 * */
3130void array_container_intersection(const array_container_t *array1,
3131 const array_container_t *array2,
3132 array_container_t *out) {
3133 int32_t card_1 = array1->cardinality, card_2 = array2->cardinality,
3134 min_card = minimum_int32(a: card_1, b: card_2);
3135 const int threshold = 64; // subject to tuning
3136#ifdef USEAVX
3137 if (out->capacity < min_card) {
3138 array_container_grow(out, min_card + sizeof(__m128i) / sizeof(uint16_t),
3139 false);
3140 }
3141#else
3142 if (out->capacity < min_card) {
3143 array_container_grow(container: out, min: min_card, false);
3144 }
3145#endif
3146
3147 if (card_1 * threshold < card_2) {
3148 out->cardinality = intersect_skewed_uint16(
3149 small: array1->array, size_s: card_1, large: array2->array, size_l: card_2, buffer: out->array);
3150 } else if (card_2 * threshold < card_1) {
3151 out->cardinality = intersect_skewed_uint16(
3152 small: array2->array, size_s: card_2, large: array1->array, size_l: card_1, buffer: out->array);
3153 } else {
3154#ifdef USEAVX
3155 out->cardinality = intersect_vector16(
3156 array1->array, card_1, array2->array, card_2, out->array);
3157#else
3158 out->cardinality = intersect_uint16(A: array1->array, lenA: card_1,
3159 B: array2->array, lenB: card_2, out: out->array);
3160#endif
3161 }
3162}
3163
3164/* computes the size of the intersection of array1 and array2
3165 * */
3166int array_container_intersection_cardinality(const array_container_t *array1,
3167 const array_container_t *array2) {
3168 int32_t card_1 = array1->cardinality, card_2 = array2->cardinality;
3169 const int threshold = 64; // subject to tuning
3170 if (card_1 * threshold < card_2) {
3171 return intersect_skewed_uint16_cardinality(small: array1->array, size_s: card_1,
3172 large: array2->array, size_l: card_2);
3173 } else if (card_2 * threshold < card_1) {
3174 return intersect_skewed_uint16_cardinality(small: array2->array, size_s: card_2,
3175 large: array1->array, size_l: card_1);
3176 } else {
3177#ifdef USEAVX
3178 return intersect_vector16_cardinality(array1->array, card_1,
3179 array2->array, card_2);
3180#else
3181 return intersect_uint16_cardinality(A: array1->array, lenA: card_1,
3182 B: array2->array, lenB: card_2);
3183#endif
3184 }
3185}
3186
3187bool array_container_intersect(const array_container_t *array1,
3188 const array_container_t *array2) {
3189 int32_t card_1 = array1->cardinality, card_2 = array2->cardinality;
3190 const int threshold = 64; // subject to tuning
3191 if (card_1 * threshold < card_2) {
3192 return intersect_skewed_uint16_nonempty(
3193 small: array1->array, size_s: card_1, large: array2->array, size_l: card_2);
3194 } else if (card_2 * threshold < card_1) {
3195 return intersect_skewed_uint16_nonempty(
3196 small: array2->array, size_s: card_2, large: array1->array, size_l: card_1);
3197 } else {
3198 // we do not bother vectorizing
3199 return intersect_uint16_nonempty(A: array1->array, lenA: card_1,
3200 B: array2->array, lenB: card_2);
3201 }
3202}
3203
3204/* computes the intersection of array1 and array2 and write the result to
3205 * array1.
3206 * */
3207void array_container_intersection_inplace(array_container_t *src_1,
3208 const array_container_t *src_2) {
3209 // todo: can any of this be vectorized?
3210 int32_t card_1 = src_1->cardinality, card_2 = src_2->cardinality;
3211 const int threshold = 64; // subject to tuning
3212 if (card_1 * threshold < card_2) {
3213 src_1->cardinality = intersect_skewed_uint16(
3214 small: src_1->array, size_s: card_1, large: src_2->array, size_l: card_2, buffer: src_1->array);
3215 } else if (card_2 * threshold < card_1) {
3216 src_1->cardinality = intersect_skewed_uint16(
3217 small: src_2->array, size_s: card_2, large: src_1->array, size_l: card_1, buffer: src_1->array);
3218 } else {
3219 src_1->cardinality = intersect_uint16(
3220 A: src_1->array, lenA: card_1, B: src_2->array, lenB: card_2, out: src_1->array);
3221 }
3222}
3223
3224int array_container_to_uint32_array(void *vout, const array_container_t *cont,
3225 uint32_t base) {
3226 int outpos = 0;
3227 uint32_t *out = (uint32_t *)vout;
3228 for (int i = 0; i < cont->cardinality; ++i) {
3229 const uint32_t val = base + cont->array[i];
3230 memcpy(dest: out + outpos, src: &val,
3231 n: sizeof(uint32_t)); // should be compiled as a MOV on x64
3232 outpos++;
3233 }
3234 return outpos;
3235}
3236
3237void array_container_printf(const array_container_t *v) {
3238 if (v->cardinality == 0) {
3239 printf(format: "{}");
3240 return;
3241 }
3242 printf(format: "{");
3243 printf(format: "%d", v->array[0]);
3244 for (int i = 1; i < v->cardinality; ++i) {
3245 printf(format: ",%d", v->array[i]);
3246 }
3247 printf(format: "}");
3248}
3249
3250void array_container_printf_as_uint32_array(const array_container_t *v,
3251 uint32_t base) {
3252 if (v->cardinality == 0) {
3253 return;
3254 }
3255 printf(format: "%u", v->array[0] + base);
3256 for (int i = 1; i < v->cardinality; ++i) {
3257 printf(format: ",%u", v->array[i] + base);
3258 }
3259}
3260
3261/* Compute the number of runs */
3262int32_t array_container_number_of_runs(const array_container_t *a) {
3263 // Can SIMD work here?
3264 int32_t nr_runs = 0;
3265 int32_t prev = -2;
3266 for (const uint16_t *p = a->array; p != a->array + a->cardinality; ++p) {
3267 if (*p != prev + 1) nr_runs++;
3268 prev = *p;
3269 }
3270 return nr_runs;
3271}
3272
3273int32_t array_container_serialize(const array_container_t *container, char *buf) {
3274 int32_t l, off;
3275 uint16_t cardinality = (uint16_t)container->cardinality;
3276
3277 memcpy(dest: buf, src: &cardinality, n: off = sizeof(cardinality));
3278 l = sizeof(uint16_t) * container->cardinality;
3279 if (l) memcpy(dest: &buf[off], src: container->array, n: l);
3280
3281 return (off + l);
3282}
3283
3284/**
3285 * Writes the underlying array to buf, outputs how many bytes were written.
3286 * The number of bytes written should be
3287 * array_container_size_in_bytes(container).
3288 *
3289 */
3290int32_t array_container_write(const array_container_t *container, char *buf) {
3291 memcpy(dest: buf, src: container->array, n: container->cardinality * sizeof(uint16_t));
3292 return array_container_size_in_bytes(container);
3293}
3294
3295bool array_container_is_subset(const array_container_t *container1,
3296 const array_container_t *container2) {
3297 if (container1->cardinality > container2->cardinality) {
3298 return false;
3299 }
3300 int i1 = 0, i2 = 0;
3301 while (i1 < container1->cardinality && i2 < container2->cardinality) {
3302 if (container1->array[i1] == container2->array[i2]) {
3303 i1++;
3304 i2++;
3305 } else if (container1->array[i1] > container2->array[i2]) {
3306 i2++;
3307 } else { // container1->array[i1] < container2->array[i2]
3308 return false;
3309 }
3310 }
3311 if (i1 == container1->cardinality) {
3312 return true;
3313 } else {
3314 return false;
3315 }
3316}
3317
3318int32_t array_container_read(int32_t cardinality, array_container_t *container,
3319 const char *buf) {
3320 if (container->capacity < cardinality) {
3321 array_container_grow(container, min: cardinality, false);
3322 }
3323 container->cardinality = cardinality;
3324 memcpy(dest: container->array, src: buf, n: container->cardinality * sizeof(uint16_t));
3325
3326 return array_container_size_in_bytes(container);
3327}
3328
3329uint32_t array_container_serialization_len(const array_container_t *container) {
3330 return (sizeof(uint16_t) /* container->cardinality converted to 16 bit */ +
3331 (sizeof(uint16_t) * container->cardinality));
3332}
3333
3334void *array_container_deserialize(const char *buf, size_t buf_len) {
3335 array_container_t *ptr;
3336
3337 if (buf_len < 2) /* capacity converted to 16 bit */
3338 return (NULL);
3339 else
3340 buf_len -= 2;
3341
3342 if ((ptr = (array_container_t *)malloc(size: sizeof(array_container_t))) !=
3343 NULL) {
3344 size_t len;
3345 int32_t off;
3346 uint16_t cardinality;
3347
3348 memcpy(dest: &cardinality, src: buf, n: off = sizeof(cardinality));
3349
3350 ptr->capacity = ptr->cardinality = (uint32_t)cardinality;
3351 len = sizeof(uint16_t) * ptr->cardinality;
3352
3353 if (len != buf_len) {
3354 free(ptr: ptr);
3355 return (NULL);
3356 }
3357
3358 if ((ptr->array = (uint16_t *)malloc(size: sizeof(uint16_t) *
3359 ptr->capacity)) == NULL) {
3360 free(ptr: ptr);
3361 return (NULL);
3362 }
3363
3364 if (len) memcpy(dest: ptr->array, src: &buf[off], n: len);
3365
3366 /* Check if returned values are monotonically increasing */
3367 for (int32_t i = 0, j = 0; i < ptr->cardinality; i++) {
3368 if (ptr->array[i] < j) {
3369 free(ptr: ptr->array);
3370 free(ptr: ptr);
3371 return (NULL);
3372 } else
3373 j = ptr->array[i];
3374 }
3375 }
3376
3377 return (ptr);
3378}
3379
3380bool array_container_iterate(const array_container_t *cont, uint32_t base,
3381 roaring_iterator iterator, void *ptr) {
3382 for (int i = 0; i < cont->cardinality; i++)
3383 if (!iterator(cont->array[i] + base, ptr)) return false;
3384 return true;
3385}
3386
3387bool array_container_iterate64(const array_container_t *cont, uint32_t base,
3388 roaring_iterator64 iterator, uint64_t high_bits,
3389 void *ptr) {
3390 for (int i = 0; i < cont->cardinality; i++)
3391 if (!iterator(high_bits | (uint64_t)(cont->array[i] + base), ptr))
3392 return false;
3393 return true;
3394}
3395/* end file src/containers/array.c */
3396/* begin file src/containers/bitset.c */
3397/*
3398 * bitset.c
3399 *
3400 */
3401#ifndef _POSIX_C_SOURCE
3402#define _POSIX_C_SOURCE 200809L
3403#endif
3404#include <assert.h>
3405#include <stdio.h>
3406#include <stdlib.h>
3407#include <string.h>
3408
3409
3410void bitset_container_clear(bitset_container_t *bitset) {
3411 memset(s: bitset->array, c: 0, n: sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
3412 bitset->cardinality = 0;
3413}
3414
3415void bitset_container_set_all(bitset_container_t *bitset) {
3416 memset(s: bitset->array, INT64_C(-1),
3417 n: sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
3418 bitset->cardinality = (1 << 16);
3419}
3420
3421
3422
3423/* Create a new bitset. Return NULL in case of failure. */
3424bitset_container_t *bitset_container_create(void) {
3425 bitset_container_t *bitset =
3426 (bitset_container_t *)malloc(size: sizeof(bitset_container_t));
3427
3428 if (!bitset) {
3429 return NULL;
3430 }
3431 // sizeof(__m256i) == 32
3432 bitset->array = (uint64_t *)roaring_bitmap_aligned_malloc(
3433 alignment: 32, size: sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
3434 if (!bitset->array) {
3435 free(ptr: bitset);
3436 return NULL;
3437 }
3438 bitset_container_clear(bitset);
3439 return bitset;
3440}
3441
3442/* Copy one container into another. We assume that they are distinct. */
3443void bitset_container_copy(const bitset_container_t *source,
3444 bitset_container_t *dest) {
3445 dest->cardinality = source->cardinality;
3446 memcpy(dest: dest->array, src: source->array,
3447 n: sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
3448}
3449
3450void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min,
3451 uint32_t max, uint16_t step) {
3452 if (step == 0) return; // refuse to crash
3453 if ((64 % step) == 0) { // step divides 64
3454 uint64_t mask = 0; // construct the repeated mask
3455 for (uint32_t value = (min % step); value < 64; value += step) {
3456 mask |= ((uint64_t)1 << value);
3457 }
3458 uint32_t firstword = min / 64;
3459 uint32_t endword = (max - 1) / 64;
3460 bitset->cardinality = (max - min + step - 1) / step;
3461 if (firstword == endword) {
3462 bitset->array[firstword] |=
3463 mask & (((~UINT64_C(0)) << (min % 64)) &
3464 ((~UINT64_C(0)) >> ((~max + 1) % 64)));
3465 return;
3466 }
3467 bitset->array[firstword] = mask & ((~UINT64_C(0)) << (min % 64));
3468 for (uint32_t i = firstword + 1; i < endword; i++)
3469 bitset->array[i] = mask;
3470 bitset->array[endword] = mask & ((~UINT64_C(0)) >> ((~max + 1) % 64));
3471 } else {
3472 for (uint32_t value = min; value < max; value += step) {
3473 bitset_container_add(bitset, pos: value);
3474 }
3475 }
3476}
3477
3478/* Free memory. */
3479void bitset_container_free(bitset_container_t *bitset) {
3480 if(bitset->array != NULL) {// Jon Strabala reports that some tools complain otherwise
3481 roaring_bitmap_aligned_free(memblock: bitset->array);
3482 bitset->array = NULL; // pedantic
3483 }
3484 free(ptr: bitset);
3485}
3486
3487/* duplicate container. */
3488bitset_container_t *bitset_container_clone(const bitset_container_t *src) {
3489 bitset_container_t *bitset =
3490 (bitset_container_t *)malloc(size: sizeof(bitset_container_t));
3491 assert(bitset);
3492
3493 // sizeof(__m256i) == 32
3494 bitset->array = (uint64_t *)roaring_bitmap_aligned_malloc(
3495 alignment: 32, size: sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
3496 assert(bitset->array);
3497 bitset->cardinality = src->cardinality;
3498 memcpy(dest: bitset->array, src: src->array,
3499 n: sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
3500 return bitset;
3501}
3502
3503void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin,
3504 uint32_t end) {
3505 bitset_set_range(bitmap: bitset->array, start: begin, end);
3506 bitset->cardinality =
3507 bitset_container_compute_cardinality(bitset); // could be smarter
3508}
3509
3510
3511bool bitset_container_intersect(const bitset_container_t *src_1,
3512 const bitset_container_t *src_2) {
3513 // could vectorize, but this is probably already quite fast in practice
3514 const uint64_t * __restrict__ array_1 = src_1->array;
3515 const uint64_t * __restrict__ array_2 = src_2->array;
3516 for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) {
3517 if((array_1[i] & array_2[i]) != 0) return true;
3518 }
3519 return false;
3520}
3521
3522
3523#ifdef USEAVX
3524#ifndef WORDS_IN_AVX2_REG
3525#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t)
3526#endif
3527/* Get the number of bits set (force computation) */
3528int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
3529 return (int) avx2_harley_seal_popcount256(
3530 (const __m256i *)bitset->array,
3531 BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));
3532}
3533
3534#elif defined(USENEON)
3535int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
3536 uint16x8_t n0 = vdupq_n_u16(0);
3537 uint16x8_t n1 = vdupq_n_u16(0);
3538 uint16x8_t n2 = vdupq_n_u16(0);
3539 uint16x8_t n3 = vdupq_n_u16(0);
3540 for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) {
3541 uint64x2_t c0 = vld1q_u64(&bitset->array[i + 0]);
3542 n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0))));
3543 uint64x2_t c1 = vld1q_u64(&bitset->array[i + 2]);
3544 n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1))));
3545 uint64x2_t c2 = vld1q_u64(&bitset->array[i + 4]);
3546 n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2))));
3547 uint64x2_t c3 = vld1q_u64(&bitset->array[i + 6]);
3548 n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3))));
3549 }
3550 uint64x2_t n = vdupq_n_u64(0);
3551 n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0)));
3552 n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1)));
3553 n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2)));
3554 n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3)));
3555 return vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1);
3556}
3557
3558#else
3559
3560/* Get the number of bits set (force computation) */
3561int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
3562 const uint64_t *array = bitset->array;
3563 int32_t sum = 0;
3564 for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) {
3565 sum += hamming(x: array[i]);
3566 sum += hamming(x: array[i + 1]);
3567 sum += hamming(x: array[i + 2]);
3568 sum += hamming(x: array[i + 3]);
3569 }
3570 return sum;
3571}
3572
3573#endif
3574
3575#ifdef USEAVX
3576
3577#define BITSET_CONTAINER_FN_REPEAT 8
3578#ifndef WORDS_IN_AVX2_REG
3579#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t)
3580#endif
3581#define LOOP_SIZE \
3582 BITSET_CONTAINER_SIZE_IN_WORDS / \
3583 ((WORDS_IN_AVX2_REG)*BITSET_CONTAINER_FN_REPEAT)
3584
3585/* Computes a binary operation (eg union) on bitset1 and bitset2 and write the
3586 result to bitsetout */
3587// clang-format off
3588#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \
3589int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \
3590 const bitset_container_t *src_2, \
3591 bitset_container_t *dst) { \
3592 const uint8_t * __restrict__ array_1 = (const uint8_t *)src_1->array; \
3593 const uint8_t * __restrict__ array_2 = (const uint8_t *)src_2->array; \
3594 /* not using the blocking optimization for some reason*/ \
3595 uint8_t *out = (uint8_t*)dst->array; \
3596 const int innerloop = 8; \
3597 for (size_t i = 0; \
3598 i < BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG); \
3599 i+=innerloop) {\
3600 __m256i A1, A2, AO; \
3601 A1 = _mm256_lddqu_si256((const __m256i *)(array_1)); \
3602 A2 = _mm256_lddqu_si256((const __m256i *)(array_2)); \
3603 AO = avx_intrinsic(A2, A1); \
3604 _mm256_storeu_si256((__m256i *)out, AO); \
3605 A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 32)); \
3606 A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 32)); \
3607 AO = avx_intrinsic(A2, A1); \
3608 _mm256_storeu_si256((__m256i *)(out+32), AO); \
3609 A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 64)); \
3610 A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 64)); \
3611 AO = avx_intrinsic(A2, A1); \
3612 _mm256_storeu_si256((__m256i *)(out+64), AO); \
3613 A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 96)); \
3614 A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 96)); \
3615 AO = avx_intrinsic(A2, A1); \
3616 _mm256_storeu_si256((__m256i *)(out+96), AO); \
3617 A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 128)); \
3618 A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 128)); \
3619 AO = avx_intrinsic(A2, A1); \
3620 _mm256_storeu_si256((__m256i *)(out+128), AO); \
3621 A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 160)); \
3622 A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 160)); \
3623 AO = avx_intrinsic(A2, A1); \
3624 _mm256_storeu_si256((__m256i *)(out+160), AO); \
3625 A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 192)); \
3626 A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 192)); \
3627 AO = avx_intrinsic(A2, A1); \
3628 _mm256_storeu_si256((__m256i *)(out+192), AO); \
3629 A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 224)); \
3630 A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 224)); \
3631 AO = avx_intrinsic(A2, A1); \
3632 _mm256_storeu_si256((__m256i *)(out+224), AO); \
3633 out+=256; \
3634 array_1 += 256; \
3635 array_2 += 256; \
3636 } \
3637 dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \
3638 return dst->cardinality; \
3639} \
3640/* next, a version that updates cardinality*/ \
3641int bitset_container_##opname(const bitset_container_t *src_1, \
3642 const bitset_container_t *src_2, \
3643 bitset_container_t *dst) { \
3644 const __m256i * __restrict__ array_1 = (const __m256i *) src_1->array; \
3645 const __m256i * __restrict__ array_2 = (const __m256i *) src_2->array; \
3646 __m256i *out = (__m256i *) dst->array; \
3647 dst->cardinality = (int32_t)avx2_harley_seal_popcount256andstore_##opname(array_2,\
3648 array_1, out,BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));\
3649 return dst->cardinality; \
3650} \
3651/* next, a version that just computes the cardinality*/ \
3652int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \
3653 const bitset_container_t *src_2) { \
3654 const __m256i * __restrict__ data1 = (const __m256i *) src_1->array; \
3655 const __m256i * __restrict__ data2 = (const __m256i *) src_2->array; \
3656 return (int)avx2_harley_seal_popcount256_##opname(data2, \
3657 data1, BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));\
3658}
3659
3660#elif defined(USENEON)
3661
3662#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \
3663int bitset_container_##opname(const bitset_container_t *src_1, \
3664 const bitset_container_t *src_2, \
3665 bitset_container_t *dst) { \
3666 const uint64_t * __restrict__ array_1 = src_1->array; \
3667 const uint64_t * __restrict__ array_2 = src_2->array; \
3668 uint64_t *out = dst->array; \
3669 uint16x8_t n0 = vdupq_n_u16(0); \
3670 uint16x8_t n1 = vdupq_n_u16(0); \
3671 uint16x8_t n2 = vdupq_n_u16(0); \
3672 uint16x8_t n3 = vdupq_n_u16(0); \
3673 for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \
3674 uint64x2_t c0 = neon_intrinsic(vld1q_u64(&array_1[i + 0]), \
3675 vld1q_u64(&array_2[i + 0])); \
3676 n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \
3677 vst1q_u64(&out[i + 0], c0); \
3678 uint64x2_t c1 = neon_intrinsic(vld1q_u64(&array_1[i + 2]), \
3679 vld1q_u64(&array_2[i + 2])); \
3680 n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); \
3681 vst1q_u64(&out[i + 2], c1); \
3682 uint64x2_t c2 = neon_intrinsic(vld1q_u64(&array_1[i + 4]), \
3683 vld1q_u64(&array_2[i + 4])); \
3684 n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); \
3685 vst1q_u64(&out[i + 4], c2); \
3686 uint64x2_t c3 = neon_intrinsic(vld1q_u64(&array_1[i + 6]), \
3687 vld1q_u64(&array_2[i + 6])); \
3688 n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); \
3689 vst1q_u64(&out[i + 6], c3); \
3690 } \
3691 uint64x2_t n = vdupq_n_u64(0); \
3692 n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); \
3693 n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); \
3694 n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); \
3695 n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); \
3696 dst->cardinality = vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); \
3697 return dst->cardinality; \
3698} \
3699int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \
3700 const bitset_container_t *src_2, \
3701 bitset_container_t *dst) { \
3702 const uint64_t * __restrict__ array_1 = src_1->array; \
3703 const uint64_t * __restrict__ array_2 = src_2->array; \
3704 uint64_t *out = dst->array; \
3705 for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \
3706 vst1q_u64(&out[i + 0], neon_intrinsic(vld1q_u64(&array_1[i + 0]), \
3707 vld1q_u64(&array_2[i + 0]))); \
3708 vst1q_u64(&out[i + 2], neon_intrinsic(vld1q_u64(&array_1[i + 2]), \
3709 vld1q_u64(&array_2[i + 2]))); \
3710 vst1q_u64(&out[i + 4], neon_intrinsic(vld1q_u64(&array_1[i + 4]), \
3711 vld1q_u64(&array_2[i + 4]))); \
3712 vst1q_u64(&out[i + 6], neon_intrinsic(vld1q_u64(&array_1[i + 6]), \
3713 vld1q_u64(&array_2[i + 6]))); \
3714 } \
3715 dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \
3716 return dst->cardinality; \
3717} \
3718int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \
3719 const bitset_container_t *src_2) { \
3720 const uint64_t * __restrict__ array_1 = src_1->array; \
3721 const uint64_t * __restrict__ array_2 = src_2->array; \
3722 uint16x8_t n0 = vdupq_n_u16(0); \
3723 uint16x8_t n1 = vdupq_n_u16(0); \
3724 uint16x8_t n2 = vdupq_n_u16(0); \
3725 uint16x8_t n3 = vdupq_n_u16(0); \
3726 for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \
3727 uint64x2_t c0 = neon_intrinsic(vld1q_u64(&array_1[i + 0]), \
3728 vld1q_u64(&array_2[i + 0])); \
3729 n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \
3730 uint64x2_t c1 = neon_intrinsic(vld1q_u64(&array_1[i + 2]), \
3731 vld1q_u64(&array_2[i + 2])); \
3732 n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); \
3733 uint64x2_t c2 = neon_intrinsic(vld1q_u64(&array_1[i + 4]), \
3734 vld1q_u64(&array_2[i + 4])); \
3735 n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); \
3736 uint64x2_t c3 = neon_intrinsic(vld1q_u64(&array_1[i + 6]), \
3737 vld1q_u64(&array_2[i + 6])); \
3738 n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); \
3739 } \
3740 uint64x2_t n = vdupq_n_u64(0); \
3741 n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); \
3742 n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); \
3743 n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); \
3744 n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); \
3745 return vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); \
3746}
3747
3748#else /* not USEAVX */
3749
3750#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \
3751int bitset_container_##opname(const bitset_container_t *src_1, \
3752 const bitset_container_t *src_2, \
3753 bitset_container_t *dst) { \
3754 const uint64_t * __restrict__ array_1 = src_1->array; \
3755 const uint64_t * __restrict__ array_2 = src_2->array; \
3756 uint64_t *out = dst->array; \
3757 int32_t sum = 0; \
3758 for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
3759 const uint64_t word_1 = (array_1[i])opsymbol(array_2[i]), \
3760 word_2 = (array_1[i + 1])opsymbol(array_2[i + 1]); \
3761 out[i] = word_1; \
3762 out[i + 1] = word_2; \
3763 sum += hamming(word_1); \
3764 sum += hamming(word_2); \
3765 } \
3766 dst->cardinality = sum; \
3767 return dst->cardinality; \
3768} \
3769int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \
3770 const bitset_container_t *src_2, \
3771 bitset_container_t *dst) { \
3772 const uint64_t * __restrict__ array_1 = src_1->array; \
3773 const uint64_t * __restrict__ array_2 = src_2->array; \
3774 uint64_t *out = dst->array; \
3775 for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \
3776 out[i] = (array_1[i])opsymbol(array_2[i]); \
3777 } \
3778 dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \
3779 return dst->cardinality; \
3780} \
3781int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \
3782 const bitset_container_t *src_2) { \
3783 const uint64_t * __restrict__ array_1 = src_1->array; \
3784 const uint64_t * __restrict__ array_2 = src_2->array; \
3785 int32_t sum = 0; \
3786 for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
3787 const uint64_t word_1 = (array_1[i])opsymbol(array_2[i]), \
3788 word_2 = (array_1[i + 1])opsymbol(array_2[i + 1]); \
3789 sum += hamming(word_1); \
3790 sum += hamming(word_2); \
3791 } \
3792 return sum; \
3793}
3794
3795#endif
3796
3797// we duplicate the function because other containers use the "or" term, makes API more consistent
3798BITSET_CONTAINER_FN(or, |, _mm256_or_si256, vorrq_u64)
3799BITSET_CONTAINER_FN(union, |, _mm256_or_si256, vorrq_u64)
3800
3801// we duplicate the function because other containers use the "intersection" term, makes API more consistent
3802BITSET_CONTAINER_FN(and, &, _mm256_and_si256, vandq_u64)
3803BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256, vandq_u64)
3804
3805BITSET_CONTAINER_FN(xor, ^, _mm256_xor_si256, veorq_u64)
3806BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64)
3807// clang-format On
3808
3809
3810
3811int bitset_container_to_uint32_array( void *vout, const bitset_container_t *cont, uint32_t base) {
3812#ifdef USEAVX2FORDECODING
3813 if(cont->cardinality >= 8192)// heuristic
3814 return (int) bitset_extract_setbits_avx2(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,cont->cardinality,base);
3815 else
3816 return (int) bitset_extract_setbits(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,base);
3817#else
3818 return (int) bitset_extract_setbits(bitset: cont->array, length: BITSET_CONTAINER_SIZE_IN_WORDS, vout,base);
3819#endif
3820}
3821
3822/*
3823 * Print this container using printf (useful for debugging).
3824 */
3825void bitset_container_printf(const bitset_container_t * v) {
3826 printf(format: "{");
3827 uint32_t base = 0;
3828 bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable
3829 for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
3830 uint64_t w = v->array[i];
3831 while (w != 0) {
3832 uint64_t t = w & (~w + 1);
3833 int r = __builtin_ctzll(w);
3834 if(iamfirst) {// predicted to be false
3835 printf(format: "%u",base + r);
3836 iamfirst = false;
3837 } else {
3838 printf(format: ",%u",base + r);
3839 }
3840 w ^= t;
3841 }
3842 base += 64;
3843 }
3844 printf(format: "}");
3845}
3846
3847
3848/*
3849 * Print this container using printf as a comma-separated list of 32-bit integers starting at base.
3850 */
3851void bitset_container_printf_as_uint32_array(const bitset_container_t * v, uint32_t base) {
3852 bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable
3853 for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
3854 uint64_t w = v->array[i];
3855 while (w != 0) {
3856 uint64_t t = w & (~w + 1);
3857 int r = __builtin_ctzll(w);
3858 if(iamfirst) {// predicted to be false
3859 printf(format: "%u", r + base);
3860 iamfirst = false;
3861 } else {
3862 printf(format: ",%u",r + base);
3863 }
3864 w ^= t;
3865 }
3866 base += 64;
3867 }
3868}
3869
3870
3871// TODO: use the fast lower bound, also
3872int bitset_container_number_of_runs(bitset_container_t *b) {
3873 int num_runs = 0;
3874 uint64_t next_word = b->array[0];
3875
3876 for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS-1; ++i) {
3877 uint64_t word = next_word;
3878 next_word = b->array[i+1];
3879 num_runs += hamming(x: (~word) & (word << 1)) + ( (word >> 63) & ~next_word);
3880 }
3881
3882 uint64_t word = next_word;
3883 num_runs += hamming(x: (~word) & (word << 1));
3884 if((word & 0x8000000000000000ULL) != 0)
3885 num_runs++;
3886 return num_runs;
3887}
3888
3889int32_t bitset_container_serialize(const bitset_container_t *container, char *buf) {
3890 int32_t l = sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS;
3891 memcpy(dest: buf, src: container->array, n: l);
3892 return(l);
3893}
3894
3895
3896
3897int32_t bitset_container_write(const bitset_container_t *container,
3898 char *buf) {
3899 memcpy(dest: buf, src: container->array, n: BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
3900 return bitset_container_size_in_bytes(container);
3901}
3902
3903
3904int32_t bitset_container_read(int32_t cardinality, bitset_container_t *container,
3905 const char *buf) {
3906 container->cardinality = cardinality;
3907 memcpy(dest: container->array, src: buf, n: BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
3908 return bitset_container_size_in_bytes(container);
3909}
3910
3911uint32_t bitset_container_serialization_len(void) {
3912 return(sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
3913}
3914
3915void* bitset_container_deserialize(const char *buf, size_t buf_len) {
3916 bitset_container_t *ptr;
3917 size_t l = sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS;
3918
3919 if(l != buf_len)
3920 return(NULL);
3921
3922 if((ptr = (bitset_container_t *)malloc(size: sizeof(bitset_container_t))) != NULL) {
3923 memcpy(dest: ptr, src: buf, n: sizeof(bitset_container_t));
3924 // sizeof(__m256i) == 32
3925 ptr->array = (uint64_t *) roaring_bitmap_aligned_malloc(alignment: 32, size: l);
3926 if (! ptr->array) {
3927 free(ptr: ptr);
3928 return NULL;
3929 }
3930 memcpy(dest: ptr->array, src: buf, n: l);
3931 ptr->cardinality = bitset_container_compute_cardinality(bitset: ptr);
3932 }
3933
3934 return((void*)ptr);
3935}
3936
3937bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, roaring_iterator iterator, void *ptr) {
3938 for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
3939 uint64_t w = cont->array[i];
3940 while (w != 0) {
3941 uint64_t t = w & (~w + 1);
3942 int r = __builtin_ctzll(w);
3943 if(!iterator(r + base, ptr)) return false;
3944 w ^= t;
3945 }
3946 base += 64;
3947 }
3948 return true;
3949}
3950
3951bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, roaring_iterator64 iterator, uint64_t high_bits, void *ptr) {
3952 for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
3953 uint64_t w = cont->array[i];
3954 while (w != 0) {
3955 uint64_t t = w & (~w + 1);
3956 int r = __builtin_ctzll(w);
3957 if(!iterator(high_bits | (uint64_t)(r + base), ptr)) return false;
3958 w ^= t;
3959 }
3960 base += 64;
3961 }
3962 return true;
3963}
3964
3965
3966bool bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) {
3967 if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) {
3968 if(container1->cardinality != container2->cardinality) {
3969 return false;
3970 }
3971 if (container1->cardinality == INT32_C(0x10000)) {
3972 return true;
3973 }
3974 }
3975#ifdef USEAVX
3976 const __m256i *ptr1 = (const __m256i*)container1->array;
3977 const __m256i *ptr2 = (const __m256i*)container2->array;
3978 for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)/32; i++) {
3979 __m256i r1 = _mm256_load_si256(ptr1+i);
3980 __m256i r2 = _mm256_load_si256(ptr2+i);
3981 int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2));
3982 if ((uint32_t)mask != UINT32_MAX) {
3983 return false;
3984 }
3985 }
3986#else
3987 return memcmp(s1: container1->array,
3988 s2: container2->array,
3989 n: BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)) == 0;
3990#endif
3991 return true;
3992}
3993
3994bool bitset_container_is_subset(const bitset_container_t *container1,
3995 const bitset_container_t *container2) {
3996 if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) {
3997 if(container1->cardinality > container2->cardinality) {
3998 return false;
3999 }
4000 }
4001 for(int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
4002 if((container1->array[i] & container2->array[i]) != container1->array[i]) {
4003 return false;
4004 }
4005 }
4006 return true;
4007}
4008
4009bool bitset_container_select(const bitset_container_t *container, uint32_t *start_rank, uint32_t rank, uint32_t *element) {
4010 int card = bitset_container_cardinality(bitset: container);
4011 if(rank >= *start_rank + card) {
4012 *start_rank += card;
4013 return false;
4014 }
4015 const uint64_t *array = container->array;
4016 int32_t size;
4017 for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 1) {
4018 size = hamming(x: array[i]);
4019 if(rank <= *start_rank + size) {
4020 uint64_t w = container->array[i];
4021 uint16_t base = i*64;
4022 while (w != 0) {
4023 uint64_t t = w & (~w + 1);
4024 int r = __builtin_ctzll(w);
4025 if(*start_rank == rank) {
4026 *element = r+base;
4027 return true;
4028 }
4029 w ^= t;
4030 *start_rank += 1;
4031 }
4032 }
4033 else
4034 *start_rank += size;
4035 }
4036 assert(false);
4037 __builtin_unreachable();
4038}
4039
4040
4041/* Returns the smallest value (assumes not empty) */
4042uint16_t bitset_container_minimum(const bitset_container_t *container) {
4043 for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
4044 uint64_t w = container->array[i];
4045 if (w != 0) {
4046 int r = __builtin_ctzll(w);
4047 return r + i * 64;
4048 }
4049 }
4050 return UINT16_MAX;
4051}
4052
4053/* Returns the largest value (assumes not empty) */
4054uint16_t bitset_container_maximum(const bitset_container_t *container) {
4055 for (int32_t i = BITSET_CONTAINER_SIZE_IN_WORDS - 1; i > 0; --i ) {
4056 uint64_t w = container->array[i];
4057 if (w != 0) {
4058 int r = __builtin_clzll(w);
4059 return i * 64 + 63 - r;
4060 }
4061 }
4062 return 0;
4063}
4064
4065/* Returns the number of values equal or smaller than x */
4066int bitset_container_rank(const bitset_container_t *container, uint16_t x) {
4067 // credit: aqrit
4068 int sum = 0;
4069 int i = 0;
4070 for (int end = x / 64; i < end; i++){
4071 sum += hamming(x: container->array[i]);
4072 }
4073 uint64_t lastword = container->array[i];
4074 uint64_t lastpos = UINT64_C(1) << (x % 64);
4075 uint64_t mask = lastpos + lastpos - 1; // smear right
4076 sum += hamming(x: lastword & mask);
4077 return sum;
4078}
4079
4080/* Returns the index of the first value equal or larger than x, or -1 */
4081int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x) {
4082 uint32_t x32 = x;
4083 uint32_t k = x32 / 64;
4084 uint64_t word = container->array[k];
4085 const int diff = x32 - k * 64; // in [0,64)
4086 word = (word >> diff) << diff; // a mask is faster, but we don't care
4087 while(word == 0) {
4088 k++;
4089 if(k == BITSET_CONTAINER_SIZE_IN_WORDS) return -1;
4090 word = container->array[k];
4091 }
4092 return k * 64 + __builtin_ctzll(word);
4093}
4094/* end file src/containers/bitset.c */
4095/* begin file src/containers/containers.c */
4096
4097
4098void container_free(void *container, uint8_t typecode) {
4099 switch (typecode) {
4100 case BITSET_CONTAINER_TYPE_CODE:
4101 bitset_container_free(bitset: (bitset_container_t *)container);
4102 break;
4103 case ARRAY_CONTAINER_TYPE_CODE:
4104 array_container_free(arr: (array_container_t *)container);
4105 break;
4106 case RUN_CONTAINER_TYPE_CODE:
4107 run_container_free(run: (run_container_t *)container);
4108 break;
4109 case SHARED_CONTAINER_TYPE_CODE:
4110 shared_container_free(container: (shared_container_t *)container);
4111 break;
4112 default:
4113 assert(false);
4114 __builtin_unreachable();
4115 }
4116}
4117
4118void container_printf(const void *container, uint8_t typecode) {
4119 container = container_unwrap_shared(candidate_shared_container: container, type: &typecode);
4120 switch (typecode) {
4121 case BITSET_CONTAINER_TYPE_CODE:
4122 bitset_container_printf(v: (const bitset_container_t *)container);
4123 return;
4124 case ARRAY_CONTAINER_TYPE_CODE:
4125 array_container_printf(v: (const array_container_t *)container);
4126 return;
4127 case RUN_CONTAINER_TYPE_CODE:
4128 run_container_printf(v: (const run_container_t *)container);
4129 return;
4130 default:
4131 __builtin_unreachable();
4132 }
4133}
4134
4135void container_printf_as_uint32_array(const void *container, uint8_t typecode,
4136 uint32_t base) {
4137 container = container_unwrap_shared(candidate_shared_container: container, type: &typecode);
4138 switch (typecode) {
4139 case BITSET_CONTAINER_TYPE_CODE:
4140 bitset_container_printf_as_uint32_array(
4141 v: (const bitset_container_t *)container, base);
4142 return;
4143 case ARRAY_CONTAINER_TYPE_CODE:
4144 array_container_printf_as_uint32_array(
4145 v: (const array_container_t *)container, base);
4146 return;
4147 case RUN_CONTAINER_TYPE_CODE:
4148 run_container_printf_as_uint32_array(
4149 v: (const run_container_t *)container, base);
4150 return;
4151 return;
4152 default:
4153 __builtin_unreachable();
4154 }
4155}
4156
4157int32_t container_serialize(const void *container, uint8_t typecode,
4158 char *buf) {
4159 container = container_unwrap_shared(candidate_shared_container: container, type: &typecode);
4160 switch (typecode) {
4161 case BITSET_CONTAINER_TYPE_CODE:
4162 return (bitset_container_serialize(container: (const bitset_container_t *)container,
4163 buf));
4164 case ARRAY_CONTAINER_TYPE_CODE:
4165 return (
4166 array_container_serialize(container: (const array_container_t *)container, buf));
4167 case RUN_CONTAINER_TYPE_CODE:
4168 return (run_container_serialize(container: (const run_container_t *)container, buf));
4169 default:
4170 assert(0);
4171 __builtin_unreachable();
4172 return (-1);
4173 }
4174}
4175
4176uint32_t container_serialization_len(const void *container, uint8_t typecode) {
4177 container = container_unwrap_shared(candidate_shared_container: container, type: &typecode);
4178 switch (typecode) {
4179 case BITSET_CONTAINER_TYPE_CODE:
4180 return bitset_container_serialization_len();
4181 case ARRAY_CONTAINER_TYPE_CODE:
4182 return array_container_serialization_len(
4183 container: (const array_container_t *)container);
4184 case RUN_CONTAINER_TYPE_CODE:
4185 return run_container_serialization_len(
4186 container: (const run_container_t *)container);
4187 default:
4188 assert(0);
4189 __builtin_unreachable();
4190 return (0);
4191 }
4192}
4193
4194void *container_deserialize(uint8_t typecode, const char *buf, size_t buf_len) {
4195 switch (typecode) {
4196 case BITSET_CONTAINER_TYPE_CODE:
4197 return (bitset_container_deserialize(buf, buf_len));
4198 case ARRAY_CONTAINER_TYPE_CODE:
4199 return (array_container_deserialize(buf, buf_len));
4200 case RUN_CONTAINER_TYPE_CODE:
4201 return (run_container_deserialize(buf, buf_len));
4202 case SHARED_CONTAINER_TYPE_CODE:
4203 printf(format: "this should never happen.\n");
4204 assert(0);
4205 __builtin_unreachable();
4206 return (NULL);
4207 default:
4208 assert(0);
4209 __builtin_unreachable();
4210 return (NULL);
4211 }
4212}
4213
4214void *get_copy_of_container(void *container, uint8_t *typecode,
4215 bool copy_on_write) {
4216 if (copy_on_write) {
4217 shared_container_t *shared_container;
4218 if (*typecode == SHARED_CONTAINER_TYPE_CODE) {
4219 shared_container = (shared_container_t *)container;
4220 shared_container->counter += 1;
4221 return shared_container;
4222 }
4223 assert(*typecode != SHARED_CONTAINER_TYPE_CODE);
4224
4225 if ((shared_container = (shared_container_t *)malloc(
4226 size: sizeof(shared_container_t))) == NULL) {
4227 return NULL;
4228 }
4229
4230 shared_container->container = container;
4231 shared_container->typecode = *typecode;
4232
4233 shared_container->counter = 2;
4234 *typecode = SHARED_CONTAINER_TYPE_CODE;
4235
4236 return shared_container;
4237 } // copy_on_write
4238 // otherwise, no copy on write...
4239 const void *actualcontainer =
4240 container_unwrap_shared(candidate_shared_container: (const void *)container, type: typecode);
4241 assert(*typecode != SHARED_CONTAINER_TYPE_CODE);
4242 return container_clone(container: actualcontainer, typecode: *typecode);
4243}
4244/**
4245 * Copies a container, requires a typecode. This allocates new memory, caller
4246 * is responsible for deallocation.
4247 */
4248void *container_clone(const void *container, uint8_t typecode) {
4249 container = container_unwrap_shared(candidate_shared_container: container, type: &typecode);
4250 switch (typecode) {
4251 case BITSET_CONTAINER_TYPE_CODE:
4252 return bitset_container_clone(src: (const bitset_container_t *)container);
4253 case ARRAY_CONTAINER_TYPE_CODE:
4254 return array_container_clone(src: (const array_container_t *)container);
4255 case RUN_CONTAINER_TYPE_CODE:
4256 return run_container_clone(src: (const run_container_t *)container);
4257 case SHARED_CONTAINER_TYPE_CODE:
4258 printf(format: "shared containers are not clonable\n");
4259 assert(false);
4260 return NULL;
4261 default:
4262 assert(false);
4263 __builtin_unreachable();
4264 return NULL;
4265 }
4266}
4267
4268void *shared_container_extract_copy(shared_container_t *container,
4269 uint8_t *typecode) {
4270 assert(container->counter > 0);
4271 assert(container->typecode != SHARED_CONTAINER_TYPE_CODE);
4272 container->counter--;
4273 *typecode = container->typecode;
4274 void *answer;
4275 if (container->counter == 0) {
4276 answer = container->container;
4277 container->container = NULL; // paranoid
4278 free(ptr: container);
4279 } else {
4280 answer = container_clone(container: container->container, typecode: *typecode);
4281 }
4282 assert(*typecode != SHARED_CONTAINER_TYPE_CODE);
4283 return answer;
4284}
4285
4286void shared_container_free(shared_container_t *container) {
4287 assert(container->counter > 0);
4288 container->counter--;
4289 if (container->counter == 0) {
4290 assert(container->typecode != SHARED_CONTAINER_TYPE_CODE);
4291 container_free(container: container->container, typecode: container->typecode);
4292 container->container = NULL; // paranoid
4293 free(ptr: container);
4294 }
4295}
4296
4297/* end file src/containers/containers.c */
4298/* begin file src/containers/convert.c */
4299#include <stdio.h>
4300
4301
4302// file contains grubby stuff that must know impl. details of all container
4303// types.
4304bitset_container_t *bitset_container_from_array(const array_container_t *a) {
4305 bitset_container_t *ans = bitset_container_create();
4306 int limit = array_container_cardinality(array: a);
4307 for (int i = 0; i < limit; ++i) bitset_container_set(bitset: ans, pos: a->array[i]);
4308 return ans;
4309}
4310
4311bitset_container_t *bitset_container_from_run(const run_container_t *arr) {
4312 int card = run_container_cardinality(run: arr);
4313 bitset_container_t *answer = bitset_container_create();
4314 for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) {
4315 rle16_t vl = arr->runs[rlepos];
4316 bitset_set_lenrange(bitmap: answer->array, start: vl.value, lenminusone: vl.length);
4317 }
4318 answer->cardinality = card;
4319 return answer;
4320}
4321
4322array_container_t *array_container_from_run(const run_container_t *arr) {
4323 array_container_t *answer =
4324 array_container_create_given_capacity(size: run_container_cardinality(run: arr));
4325 answer->cardinality = 0;
4326 for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) {
4327 int run_start = arr->runs[rlepos].value;
4328 int run_end = run_start + arr->runs[rlepos].length;
4329
4330 for (int run_value = run_start; run_value <= run_end; ++run_value) {
4331 answer->array[answer->cardinality++] = (uint16_t)run_value;
4332 }
4333 }
4334 return answer;
4335}
4336
4337array_container_t *array_container_from_bitset(const bitset_container_t *bits) {
4338 array_container_t *result =
4339 array_container_create_given_capacity(size: bits->cardinality);
4340 result->cardinality = bits->cardinality;
4341 // sse version ends up being slower here
4342 // (bitset_extract_setbits_sse_uint16)
4343 // because of the sparsity of the data
4344 bitset_extract_setbits_uint16(bitset: bits->array, length: BITSET_CONTAINER_SIZE_IN_WORDS,
4345 out: result->array, base: 0);
4346 return result;
4347}
4348
4349/* assumes that container has adequate space. Run from [s,e] (inclusive) */
4350static void add_run(run_container_t *r, int s, int e) {
4351 r->runs[r->n_runs].value = s;
4352 r->runs[r->n_runs].length = e - s;
4353 r->n_runs++;
4354}
4355
4356run_container_t *run_container_from_array(const array_container_t *c) {
4357 int32_t n_runs = array_container_number_of_runs(a: c);
4358 run_container_t *answer = run_container_create_given_capacity(size: n_runs);
4359 int prev = -2;
4360 int run_start = -1;
4361 int32_t card = c->cardinality;
4362 if (card == 0) return answer;
4363 for (int i = 0; i < card; ++i) {
4364 const uint16_t cur_val = c->array[i];
4365 if (cur_val != prev + 1) {
4366 // new run starts; flush old one, if any
4367 if (run_start != -1) add_run(r: answer, s: run_start, e: prev);
4368 run_start = cur_val;
4369 }
4370 prev = c->array[i];
4371 }
4372 // now prev is the last seen value
4373 add_run(r: answer, s: run_start, e: prev);
4374 // assert(run_container_cardinality(answer) == c->cardinality);
4375 return answer;
4376}
4377
4378/**
4379 * Convert the runcontainer to either a Bitmap or an Array Container, depending
4380 * on the cardinality. Frees the container.
4381 * Allocates and returns new container, which caller is responsible for freeing.
4382 * It does not free the run container.
4383 */
4384
4385void *convert_to_bitset_or_array_container(run_container_t *r, int32_t card,
4386 uint8_t *resulttype) {
4387 if (card <= DEFAULT_MAX_SIZE) {
4388 array_container_t *answer = array_container_create_given_capacity(size: card);
4389 answer->cardinality = 0;
4390 for (int rlepos = 0; rlepos < r->n_runs; ++rlepos) {
4391 uint16_t run_start = r->runs[rlepos].value;
4392 uint16_t run_end = run_start + r->runs[rlepos].length;
4393 for (uint16_t run_value = run_start; run_value <= run_end;
4394 ++run_value) {
4395 answer->array[answer->cardinality++] = run_value;
4396 }
4397 }
4398 assert(card == answer->cardinality);
4399 *resulttype = ARRAY_CONTAINER_TYPE_CODE;
4400 //run_container_free(r);
4401 return answer;
4402 }
4403 bitset_container_t *answer = bitset_container_create();
4404 for (int rlepos = 0; rlepos < r->n_runs; ++rlepos) {
4405 uint16_t run_start = r->runs[rlepos].value;
4406 bitset_set_lenrange(bitmap: answer->array, start: run_start, lenminusone: r->runs[rlepos].length);
4407 }
4408 answer->cardinality = card;
4409 *resulttype = BITSET_CONTAINER_TYPE_CODE;
4410 //run_container_free(r);
4411 return answer;
4412}
4413
4414/* Converts a run container to either an array or a bitset, IF it saves space.
4415 */
4416/* If a conversion occurs, the caller is responsible to free the original
4417 * container and
4418 * he becomes responsible to free the new one. */
4419void *convert_run_to_efficient_container(run_container_t *c,
4420 uint8_t *typecode_after) {
4421 int32_t size_as_run_container =
4422 run_container_serialized_size_in_bytes(num_runs: c->n_runs);
4423
4424 int32_t size_as_bitset_container =
4425 bitset_container_serialized_size_in_bytes();
4426 int32_t card = run_container_cardinality(run: c);
4427 int32_t size_as_array_container =
4428 array_container_serialized_size_in_bytes(card);
4429
4430 int32_t min_size_non_run =
4431 size_as_bitset_container < size_as_array_container
4432 ? size_as_bitset_container
4433 : size_as_array_container;
4434 if (size_as_run_container <= min_size_non_run) { // no conversion
4435 *typecode_after = RUN_CONTAINER_TYPE_CODE;
4436 return c;
4437 }
4438 if (card <= DEFAULT_MAX_SIZE) {
4439 // to array
4440 array_container_t *answer = array_container_create_given_capacity(size: card);
4441 answer->cardinality = 0;
4442 for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) {
4443 int run_start = c->runs[rlepos].value;
4444 int run_end = run_start + c->runs[rlepos].length;
4445
4446 for (int run_value = run_start; run_value <= run_end; ++run_value) {
4447 answer->array[answer->cardinality++] = (uint16_t)run_value;
4448 }
4449 }
4450 *typecode_after = ARRAY_CONTAINER_TYPE_CODE;
4451 return answer;
4452 }
4453
4454 // else to bitset
4455 bitset_container_t *answer = bitset_container_create();
4456
4457 for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) {
4458 int start = c->runs[rlepos].value;
4459 int end = start + c->runs[rlepos].length;
4460 bitset_set_range(bitmap: answer->array, start, end: end + 1);
4461 }
4462 answer->cardinality = card;
4463 *typecode_after = BITSET_CONTAINER_TYPE_CODE;
4464 return answer;
4465}
4466
4467// like convert_run_to_efficient_container but frees the old result if needed
4468void *convert_run_to_efficient_container_and_free(run_container_t *c,
4469 uint8_t *typecode_after) {
4470 void *answer = convert_run_to_efficient_container(c, typecode_after);
4471 if (answer != c) run_container_free(run: c);
4472 return answer;
4473}
4474
4475/* once converted, the original container is disposed here, rather than
4476 in roaring_array
4477*/
4478
4479// TODO: split into run- array- and bitset- subfunctions for sanity;
4480// a few function calls won't really matter.
4481
4482void *convert_run_optimize(void *c, uint8_t typecode_original,
4483 uint8_t *typecode_after) {
4484 if (typecode_original == RUN_CONTAINER_TYPE_CODE) {
4485 void *newc = convert_run_to_efficient_container(c: (run_container_t *)c,
4486 typecode_after);
4487 if (newc != c) {
4488 container_free(container: c, typecode: typecode_original);
4489 }
4490 return newc;
4491 } else if (typecode_original == ARRAY_CONTAINER_TYPE_CODE) {
4492 // it might need to be converted to a run container.
4493 array_container_t *c_qua_array = (array_container_t *)c;
4494 int32_t n_runs = array_container_number_of_runs(a: c_qua_array);
4495 int32_t size_as_run_container =
4496 run_container_serialized_size_in_bytes(num_runs: n_runs);
4497 int32_t card = array_container_cardinality(array: c_qua_array);
4498 int32_t size_as_array_container =
4499 array_container_serialized_size_in_bytes(card);
4500
4501 if (size_as_run_container >= size_as_array_container) {
4502 *typecode_after = ARRAY_CONTAINER_TYPE_CODE;
4503 return c;
4504 }
4505 // else convert array to run container
4506 run_container_t *answer = run_container_create_given_capacity(size: n_runs);
4507 int prev = -2;
4508 int run_start = -1;
4509
4510 assert(card > 0);
4511 for (int i = 0; i < card; ++i) {
4512 uint16_t cur_val = c_qua_array->array[i];
4513 if (cur_val != prev + 1) {
4514 // new run starts; flush old one, if any
4515 if (run_start != -1) add_run(r: answer, s: run_start, e: prev);
4516 run_start = cur_val;
4517 }
4518 prev = c_qua_array->array[i];
4519 }
4520 assert(run_start >= 0);
4521 // now prev is the last seen value
4522 add_run(r: answer, s: run_start, e: prev);
4523 *typecode_after = RUN_CONTAINER_TYPE_CODE;
4524 array_container_free(arr: c_qua_array);
4525 return answer;
4526 } else if (typecode_original ==
4527 BITSET_CONTAINER_TYPE_CODE) { // run conversions on bitset
4528 // does bitset need conversion to run?
4529 bitset_container_t *c_qua_bitset = (bitset_container_t *)c;
4530 int32_t n_runs = bitset_container_number_of_runs(b: c_qua_bitset);
4531 int32_t size_as_run_container =
4532 run_container_serialized_size_in_bytes(num_runs: n_runs);
4533 int32_t size_as_bitset_container =
4534 bitset_container_serialized_size_in_bytes();
4535
4536 if (size_as_bitset_container <= size_as_run_container) {
4537 // no conversion needed.
4538 *typecode_after = BITSET_CONTAINER_TYPE_CODE;
4539 return c;
4540 }
4541 // bitset to runcontainer (ported from Java RunContainer(
4542 // BitmapContainer bc, int nbrRuns))
4543 assert(n_runs > 0); // no empty bitmaps
4544 run_container_t *answer = run_container_create_given_capacity(size: n_runs);
4545
4546 int long_ctr = 0;
4547 uint64_t cur_word = c_qua_bitset->array[0];
4548 int run_count = 0;
4549 while (true) {
4550 while (cur_word == UINT64_C(0) &&
4551 long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1)
4552 cur_word = c_qua_bitset->array[++long_ctr];
4553
4554 if (cur_word == UINT64_C(0)) {
4555 bitset_container_free(bitset: c_qua_bitset);
4556 *typecode_after = RUN_CONTAINER_TYPE_CODE;
4557 return answer;
4558 }
4559
4560 int local_run_start = __builtin_ctzll(cur_word);
4561 int run_start = local_run_start + 64 * long_ctr;
4562 uint64_t cur_word_with_1s = cur_word | (cur_word - 1);
4563
4564 int run_end = 0;
4565 while (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF) &&
4566 long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1)
4567 cur_word_with_1s = c_qua_bitset->array[++long_ctr];
4568
4569 if (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF)) {
4570 run_end = 64 + long_ctr * 64; // exclusive, I guess
4571 add_run(r: answer, s: run_start, e: run_end - 1);
4572 bitset_container_free(bitset: c_qua_bitset);
4573 *typecode_after = RUN_CONTAINER_TYPE_CODE;
4574 return answer;
4575 }
4576 int local_run_end = __builtin_ctzll(~cur_word_with_1s);
4577 run_end = local_run_end + long_ctr * 64;
4578 add_run(r: answer, s: run_start, e: run_end - 1);
4579 run_count++;
4580 cur_word = cur_word_with_1s & (cur_word_with_1s + 1);
4581 }
4582 return answer;
4583 } else {
4584 assert(false);
4585 __builtin_unreachable();
4586 return NULL;
4587 }
4588}
4589
4590bitset_container_t *bitset_container_from_run_range(const run_container_t *run,
4591 uint32_t min, uint32_t max) {
4592 bitset_container_t *bitset = bitset_container_create();
4593 int32_t union_cardinality = 0;
4594 for (int32_t i = 0; i < run->n_runs; ++i) {
4595 uint32_t rle_min = run->runs[i].value;
4596 uint32_t rle_max = rle_min + run->runs[i].length;
4597 bitset_set_lenrange(bitmap: bitset->array, start: rle_min, lenminusone: rle_max - rle_min);
4598 union_cardinality += run->runs[i].length + 1;
4599 }
4600 union_cardinality += max - min + 1;
4601 union_cardinality -= bitset_lenrange_cardinality(bitmap: bitset->array, start: min, lenminusone: max-min);
4602 bitset_set_lenrange(bitmap: bitset->array, start: min, lenminusone: max - min);
4603 bitset->cardinality = union_cardinality;
4604 return bitset;
4605}
4606/* end file src/containers/convert.c */
4607/* begin file src/containers/mixed_andnot.c */
4608/*
4609 * mixed_andnot.c. More methods since operation is not symmetric,
4610 * except no "wide" andnot , so no lazy options motivated.
4611 */
4612
4613#include <assert.h>
4614#include <string.h>
4615
4616
4617/* Compute the andnot of src_1 and src_2 and write the result to
4618 * dst, a valid array container that could be the same as dst.*/
4619void array_bitset_container_andnot(const array_container_t *src_1,
4620 const bitset_container_t *src_2,
4621 array_container_t *dst) {
4622 // follows Java implementation as of June 2016
4623 if (dst->capacity < src_1->cardinality) {
4624 array_container_grow(container: dst, min: src_1->cardinality, false);
4625 }
4626 int32_t newcard = 0;
4627 const int32_t origcard = src_1->cardinality;
4628 for (int i = 0; i < origcard; ++i) {
4629 uint16_t key = src_1->array[i];
4630 dst->array[newcard] = key;
4631 newcard += 1 - bitset_container_contains(bitset: src_2, pos: key);
4632 }
4633 dst->cardinality = newcard;
4634}
4635
4636/* Compute the andnot of src_1 and src_2 and write the result to
4637 * src_1 */
4638
4639void array_bitset_container_iandnot(array_container_t *src_1,
4640 const bitset_container_t *src_2) {
4641 array_bitset_container_andnot(src_1, src_2, dst: src_1);
4642}
4643
4644/* Compute the andnot of src_1 and src_2 and write the result to
4645 * dst, which does not initially have a valid container.
4646 * Return true for a bitset result; false for array
4647 */
4648
4649bool bitset_array_container_andnot(const bitset_container_t *src_1,
4650 const array_container_t *src_2, void **dst) {
4651 // Java did this directly, but we have option of asm or avx
4652 bitset_container_t *result = bitset_container_create();
4653 bitset_container_copy(source: src_1, dest: result);
4654 result->cardinality =
4655 (int32_t)bitset_clear_list(bitset: result->array, card: (uint64_t)result->cardinality,
4656 list: src_2->array, length: (uint64_t)src_2->cardinality);
4657
4658 // do required type conversions.
4659 if (result->cardinality <= DEFAULT_MAX_SIZE) {
4660 *dst = array_container_from_bitset(bits: result);
4661 bitset_container_free(bitset: result);
4662 return false;
4663 }
4664 *dst = result;
4665 return true;
4666}
4667
4668/* Compute the andnot of src_1 and src_2 and write the result to
4669 * dst (which has no container initially). It will modify src_1
4670 * to be dst if the result is a bitset. Otherwise, it will
4671 * free src_1 and dst will be a new array container. In both
4672 * cases, the caller is responsible for deallocating dst.
4673 * Returns true iff dst is a bitset */
4674
4675bool bitset_array_container_iandnot(bitset_container_t *src_1,
4676 const array_container_t *src_2,
4677 void **dst) {
4678 *dst = src_1;
4679 src_1->cardinality =
4680 (int32_t)bitset_clear_list(bitset: src_1->array, card: (uint64_t)src_1->cardinality,
4681 list: src_2->array, length: (uint64_t)src_2->cardinality);
4682
4683 if (src_1->cardinality <= DEFAULT_MAX_SIZE) {
4684 *dst = array_container_from_bitset(bits: src_1);
4685 bitset_container_free(bitset: src_1);
4686 return false; // not bitset
4687 } else
4688 return true;
4689}
4690
4691/* Compute the andnot of src_1 and src_2 and write the result to
4692 * dst. Result may be either a bitset or an array container
4693 * (returns "result is bitset"). dst does not initially have
4694 * any container, but becomes either a bitset container (return
4695 * result true) or an array container.
4696 */
4697
4698bool run_bitset_container_andnot(const run_container_t *src_1,
4699 const bitset_container_t *src_2, void **dst) {
4700 // follows the Java implementation as of June 2016
4701 int card = run_container_cardinality(run: src_1);
4702 if (card <= DEFAULT_MAX_SIZE) {
4703 // must be an array
4704 array_container_t *answer = array_container_create_given_capacity(size: card);
4705 answer->cardinality = 0;
4706 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
4707 rle16_t rle = src_1->runs[rlepos];
4708 for (int run_value = rle.value; run_value <= rle.value + rle.length;
4709 ++run_value) {
4710 if (!bitset_container_get(bitset: src_2, pos: (uint16_t)run_value)) {
4711 answer->array[answer->cardinality++] = (uint16_t)run_value;
4712 }
4713 }
4714 }
4715 *dst = answer;
4716 return false;
4717 } else { // we guess it will be a bitset, though have to check guess when
4718 // done
4719 bitset_container_t *answer = bitset_container_clone(src: src_2);
4720
4721 uint32_t last_pos = 0;
4722 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
4723 rle16_t rle = src_1->runs[rlepos];
4724
4725 uint32_t start = rle.value;
4726 uint32_t end = start + rle.length + 1;
4727 bitset_reset_range(bitmap: answer->array, start: last_pos, end: start);
4728 bitset_flip_range(bitmap: answer->array, start, end);
4729 last_pos = end;
4730 }
4731 bitset_reset_range(bitmap: answer->array, start: last_pos, end: (uint32_t)(1 << 16));
4732
4733 answer->cardinality = bitset_container_compute_cardinality(bitset: answer);
4734
4735 if (answer->cardinality <= DEFAULT_MAX_SIZE) {
4736 *dst = array_container_from_bitset(bits: answer);
4737 bitset_container_free(bitset: answer);
4738 return false; // not bitset
4739 }
4740 *dst = answer;
4741 return true; // bitset
4742 }
4743}
4744
4745/* Compute the andnot of src_1 and src_2 and write the result to
4746 * dst. Result may be either a bitset or an array container
4747 * (returns "result is bitset"). dst does not initially have
4748 * any container, but becomes either a bitset container (return
4749 * result true) or an array container.
4750 */
4751
4752bool run_bitset_container_iandnot(run_container_t *src_1,
4753 const bitset_container_t *src_2, void **dst) {
4754 // dummy implementation
4755 bool ans = run_bitset_container_andnot(src_1, src_2, dst);
4756 run_container_free(run: src_1);
4757 return ans;
4758}
4759
4760/* Compute the andnot of src_1 and src_2 and write the result to
4761 * dst. Result may be either a bitset or an array container
4762 * (returns "result is bitset"). dst does not initially have
4763 * any container, but becomes either a bitset container (return
4764 * result true) or an array container.
4765 */
4766
4767bool bitset_run_container_andnot(const bitset_container_t *src_1,
4768 const run_container_t *src_2, void **dst) {
4769 // follows Java implementation
4770 bitset_container_t *result = bitset_container_create();
4771
4772 bitset_container_copy(source: src_1, dest: result);
4773 for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) {
4774 rle16_t rle = src_2->runs[rlepos];
4775 bitset_reset_range(bitmap: result->array, start: rle.value,
4776 end: rle.value + rle.length + UINT32_C(1));
4777 }
4778 result->cardinality = bitset_container_compute_cardinality(bitset: result);
4779
4780 if (result->cardinality <= DEFAULT_MAX_SIZE) {
4781 *dst = array_container_from_bitset(bits: result);
4782 bitset_container_free(bitset: result);
4783 return false; // not bitset
4784 }
4785 *dst = result;
4786 return true; // bitset
4787}
4788
4789/* Compute the andnot of src_1 and src_2 and write the result to
4790 * dst (which has no container initially). It will modify src_1
4791 * to be dst if the result is a bitset. Otherwise, it will
4792 * free src_1 and dst will be a new array container. In both
4793 * cases, the caller is responsible for deallocating dst.
4794 * Returns true iff dst is a bitset */
4795
4796bool bitset_run_container_iandnot(bitset_container_t *src_1,
4797 const run_container_t *src_2, void **dst) {
4798 *dst = src_1;
4799
4800 for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) {
4801 rle16_t rle = src_2->runs[rlepos];
4802 bitset_reset_range(bitmap: src_1->array, start: rle.value,
4803 end: rle.value + rle.length + UINT32_C(1));
4804 }
4805 src_1->cardinality = bitset_container_compute_cardinality(bitset: src_1);
4806
4807 if (src_1->cardinality <= DEFAULT_MAX_SIZE) {
4808 *dst = array_container_from_bitset(bits: src_1);
4809 bitset_container_free(bitset: src_1);
4810 return false; // not bitset
4811 } else
4812 return true;
4813}
4814
4815/* helper. a_out must be a valid array container with adequate capacity.
4816 * Returns the cardinality of the output container. Partly Based on Java
4817 * implementation Util.unsignedDifference.
4818 *
4819 * TODO: Util.unsignedDifference does not use advanceUntil. Is it cheaper
4820 * to avoid advanceUntil?
4821 */
4822
4823static int run_array_array_subtract(const run_container_t *r,
4824 const array_container_t *a_in,
4825 array_container_t *a_out) {
4826 int out_card = 0;
4827 int32_t in_array_pos =
4828 -1; // since advanceUntil always assumes we start the search AFTER this
4829
4830 for (int rlepos = 0; rlepos < r->n_runs; rlepos++) {
4831 int32_t start = r->runs[rlepos].value;
4832 int32_t end = start + r->runs[rlepos].length + 1;
4833
4834 in_array_pos = advanceUntil(array: a_in->array, pos: in_array_pos,
4835 length: a_in->cardinality, min: (uint16_t)start);
4836
4837 if (in_array_pos >= a_in->cardinality) { // run has no items subtracted
4838 for (int32_t i = start; i < end; ++i)
4839 a_out->array[out_card++] = (uint16_t)i;
4840 } else {
4841 uint16_t next_nonincluded = a_in->array[in_array_pos];
4842 if (next_nonincluded >= end) {
4843 // another case when run goes unaltered
4844 for (int32_t i = start; i < end; ++i)
4845 a_out->array[out_card++] = (uint16_t)i;
4846 in_array_pos--; // ensure we see this item again if necessary
4847 } else {
4848 for (int32_t i = start; i < end; ++i)
4849 if (i != next_nonincluded)
4850 a_out->array[out_card++] = (uint16_t)i;
4851 else // 0 should ensure we don't match
4852 next_nonincluded =
4853 (in_array_pos + 1 >= a_in->cardinality)
4854 ? 0
4855 : a_in->array[++in_array_pos];
4856 in_array_pos--; // see again
4857 }
4858 }
4859 }
4860 return out_card;
4861}
4862
4863/* dst does not indicate a valid container initially. Eventually it
4864 * can become any type of container.
4865 */
4866
4867int run_array_container_andnot(const run_container_t *src_1,
4868 const array_container_t *src_2, void **dst) {
4869 // follows the Java impl as of June 2016
4870
4871 int card = run_container_cardinality(run: src_1);
4872 const int arbitrary_threshold = 32;
4873
4874 if (card <= arbitrary_threshold) {
4875 if (src_2->cardinality == 0) {
4876 *dst = run_container_clone(src: src_1);
4877 return RUN_CONTAINER_TYPE_CODE;
4878 }
4879 // Java's "lazyandNot.toEfficientContainer" thing
4880 run_container_t *answer = run_container_create_given_capacity(
4881 size: card + array_container_cardinality(array: src_2));
4882
4883 int rlepos = 0;
4884 int xrlepos = 0; // "x" is src_2
4885 rle16_t rle = src_1->runs[rlepos];
4886 int32_t start = rle.value;
4887 int32_t end = start + rle.length + 1;
4888 int32_t xstart = src_2->array[xrlepos];
4889
4890 while ((rlepos < src_1->n_runs) && (xrlepos < src_2->cardinality)) {
4891 if (end <= xstart) {
4892 // output the first run
4893 answer->runs[answer->n_runs++] =
4894 (rle16_t){.value = (uint16_t)start,
4895 .length = (uint16_t)(end - start - 1)};
4896 rlepos++;
4897 if (rlepos < src_1->n_runs) {
4898 start = src_1->runs[rlepos].value;
4899 end = start + src_1->runs[rlepos].length + 1;
4900 }
4901 } else if (xstart + 1 <= start) {
4902 // exit the second run
4903 xrlepos++;
4904 if (xrlepos < src_2->cardinality) {
4905 xstart = src_2->array[xrlepos];
4906 }
4907 } else {
4908 if (start < xstart) {
4909 answer->runs[answer->n_runs++] =
4910 (rle16_t){.value = (uint16_t)start,
4911 .length = (uint16_t)(xstart - start - 1)};
4912 }
4913 if (xstart + 1 < end) {
4914 start = xstart + 1;
4915 } else {
4916 rlepos++;
4917 if (rlepos < src_1->n_runs) {
4918 start = src_1->runs[rlepos].value;
4919 end = start + src_1->runs[rlepos].length + 1;
4920 }
4921 }
4922 }
4923 }
4924 if (rlepos < src_1->n_runs) {
4925 answer->runs[answer->n_runs++] =
4926 (rle16_t){.value = (uint16_t)start,
4927 .length = (uint16_t)(end - start - 1)};
4928 rlepos++;
4929 if (rlepos < src_1->n_runs) {
4930 memcpy(dest: answer->runs + answer->n_runs, src: src_1->runs + rlepos,
4931 n: (src_1->n_runs - rlepos) * sizeof(rle16_t));
4932 answer->n_runs += (src_1->n_runs - rlepos);
4933 }
4934 }
4935 uint8_t return_type;
4936 *dst = convert_run_to_efficient_container(c: answer, typecode_after: &return_type);
4937 if (answer != *dst) run_container_free(run: answer);
4938 return return_type;
4939 }
4940 // else it's a bitmap or array
4941
4942 if (card <= DEFAULT_MAX_SIZE) {
4943 array_container_t *ac = array_container_create_given_capacity(size: card);
4944 // nb Java code used a generic iterator-based merge to compute
4945 // difference
4946 ac->cardinality = run_array_array_subtract(r: src_1, a_in: src_2, a_out: ac);
4947 *dst = ac;
4948 return ARRAY_CONTAINER_TYPE_CODE;
4949 }
4950 bitset_container_t *ans = bitset_container_from_run(arr: src_1);
4951 bool result_is_bitset = bitset_array_container_iandnot(src_1: ans, src_2, dst);
4952 return (result_is_bitset ? BITSET_CONTAINER_TYPE_CODE
4953 : ARRAY_CONTAINER_TYPE_CODE);
4954}
4955
4956/* Compute the andnot of src_1 and src_2 and write the result to
4957 * dst (which has no container initially). It will modify src_1
4958 * to be dst if the result is a bitset. Otherwise, it will
4959 * free src_1 and dst will be a new array container. In both
4960 * cases, the caller is responsible for deallocating dst.
4961 * Returns true iff dst is a bitset */
4962
4963int run_array_container_iandnot(run_container_t *src_1,
4964 const array_container_t *src_2, void **dst) {
4965 // dummy implementation same as June 2016 Java
4966 int ans = run_array_container_andnot(src_1, src_2, dst);
4967 run_container_free(run: src_1);
4968 return ans;
4969}
4970
4971/* dst must be a valid array container, allowed to be src_1 */
4972
4973void array_run_container_andnot(const array_container_t *src_1,
4974 const run_container_t *src_2,
4975 array_container_t *dst) {
4976 // basically following Java impl as of June 2016
4977 if (src_1->cardinality > dst->capacity) {
4978 array_container_grow(container: dst, min: src_1->cardinality, false);
4979 }
4980
4981 if (src_2->n_runs == 0) {
4982 memmove(dest: dst->array, src: src_1->array,
4983 n: sizeof(uint16_t) * src_1->cardinality);
4984 dst->cardinality = src_1->cardinality;
4985 return;
4986 }
4987 int32_t run_start = src_2->runs[0].value;
4988 int32_t run_end = run_start + src_2->runs[0].length;
4989 int which_run = 0;
4990
4991 uint16_t val = 0;
4992 int dest_card = 0;
4993 for (int i = 0; i < src_1->cardinality; ++i) {
4994 val = src_1->array[i];
4995 if (val < run_start)
4996 dst->array[dest_card++] = val;
4997 else if (val <= run_end) {
4998 ; // omitted item
4999 } else {
5000 do {
5001 if (which_run + 1 < src_2->n_runs) {
5002 ++which_run;
5003 run_start = src_2->runs[which_run].value;
5004 run_end = run_start + src_2->runs[which_run].length;
5005
5006 } else
5007 run_start = run_end = (1 << 16) + 1;
5008 } while (val > run_end);
5009 --i;
5010 }
5011 }
5012 dst->cardinality = dest_card;
5013}
5014
5015/* dst does not indicate a valid container initially. Eventually it
5016 * can become any kind of container.
5017 */
5018
5019void array_run_container_iandnot(array_container_t *src_1,
5020 const run_container_t *src_2) {
5021 array_run_container_andnot(src_1, src_2, dst: src_1);
5022}
5023
5024/* dst does not indicate a valid container initially. Eventually it
5025 * can become any kind of container.
5026 */
5027
5028int run_run_container_andnot(const run_container_t *src_1,
5029 const run_container_t *src_2, void **dst) {
5030 run_container_t *ans = run_container_create();
5031 run_container_andnot(src_1, src_2, dst: ans);
5032 uint8_t typecode_after;
5033 *dst = convert_run_to_efficient_container_and_free(c: ans, typecode_after: &typecode_after);
5034 return typecode_after;
5035}
5036
5037/* Compute the andnot of src_1 and src_2 and write the result to
5038 * dst (which has no container initially). It will modify src_1
5039 * to be dst if the result is a bitset. Otherwise, it will
5040 * free src_1 and dst will be a new array container. In both
5041 * cases, the caller is responsible for deallocating dst.
5042 * Returns true iff dst is a bitset */
5043
5044int run_run_container_iandnot(run_container_t *src_1,
5045 const run_container_t *src_2, void **dst) {
5046 // following Java impl as of June 2016 (dummy)
5047 int ans = run_run_container_andnot(src_1, src_2, dst);
5048 run_container_free(run: src_1);
5049 return ans;
5050}
5051
5052/*
5053 * dst is a valid array container and may be the same as src_1
5054 */
5055
5056void array_array_container_andnot(const array_container_t *src_1,
5057 const array_container_t *src_2,
5058 array_container_t *dst) {
5059 array_container_andnot(array_1: src_1, array_2: src_2, out: dst);
5060}
5061
5062/* inplace array-array andnot will always be able to reuse the space of
5063 * src_1 */
5064void array_array_container_iandnot(array_container_t *src_1,
5065 const array_container_t *src_2) {
5066 array_container_andnot(array_1: src_1, array_2: src_2, out: src_1);
5067}
5068
5069/* Compute the andnot of src_1 and src_2 and write the result to
5070 * dst (which has no container initially). Return value is
5071 * "dst is a bitset"
5072 */
5073
5074bool bitset_bitset_container_andnot(const bitset_container_t *src_1,
5075 const bitset_container_t *src_2,
5076 void **dst) {
5077 bitset_container_t *ans = bitset_container_create();
5078 int card = bitset_container_andnot(src_1, src_2, dst: ans);
5079 if (card <= DEFAULT_MAX_SIZE) {
5080 *dst = array_container_from_bitset(bits: ans);
5081 bitset_container_free(bitset: ans);
5082 return false; // not bitset
5083 } else {
5084 *dst = ans;
5085 return true;
5086 }
5087}
5088
5089/* Compute the andnot of src_1 and src_2 and write the result to
5090 * dst (which has no container initially). It will modify src_1
5091 * to be dst if the result is a bitset. Otherwise, it will
5092 * free src_1 and dst will be a new array container. In both
5093 * cases, the caller is responsible for deallocating dst.
5094 * Returns true iff dst is a bitset */
5095
5096bool bitset_bitset_container_iandnot(bitset_container_t *src_1,
5097 const bitset_container_t *src_2,
5098 void **dst) {
5099 int card = bitset_container_andnot(src_1, src_2, dst: src_1);
5100 if (card <= DEFAULT_MAX_SIZE) {
5101 *dst = array_container_from_bitset(bits: src_1);
5102 bitset_container_free(bitset: src_1);
5103 return false; // not bitset
5104 } else {
5105 *dst = src_1;
5106 return true;
5107 }
5108}
5109/* end file src/containers/mixed_andnot.c */
5110/* begin file src/containers/mixed_equal.c */
5111
5112bool array_container_equal_bitset(const array_container_t* container1,
5113 const bitset_container_t* container2) {
5114 if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) {
5115 if (container2->cardinality != container1->cardinality) {
5116 return false;
5117 }
5118 }
5119 int32_t pos = 0;
5120 for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
5121 uint64_t w = container2->array[i];
5122 while (w != 0) {
5123 uint64_t t = w & (~w + 1);
5124 uint16_t r = i * 64 + __builtin_ctzll(w);
5125 if (pos >= container1->cardinality) {
5126 return false;
5127 }
5128 if (container1->array[pos] != r) {
5129 return false;
5130 }
5131 ++pos;
5132 w ^= t;
5133 }
5134 }
5135 return (pos == container1->cardinality);
5136}
5137
5138bool run_container_equals_array(const run_container_t* container1,
5139 const array_container_t* container2) {
5140 if (run_container_cardinality(run: container1) != container2->cardinality)
5141 return false;
5142 int32_t pos = 0;
5143 for (int i = 0; i < container1->n_runs; ++i) {
5144 const uint32_t run_start = container1->runs[i].value;
5145 const uint32_t le = container1->runs[i].length;
5146
5147 if (container2->array[pos] != run_start) {
5148 return false;
5149 }
5150
5151 if (container2->array[pos + le] != run_start + le) {
5152 return false;
5153 }
5154
5155 pos += le + 1;
5156 }
5157 return true;
5158}
5159
5160bool run_container_equals_bitset(const run_container_t* container1,
5161 const bitset_container_t* container2) {
5162
5163 int run_card = run_container_cardinality(run: container1);
5164 int bitset_card = (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) ?
5165 container2->cardinality :
5166 bitset_container_compute_cardinality(bitset: container2);
5167 if (bitset_card != run_card) {
5168 return false;
5169 }
5170
5171 for (int32_t i = 0; i < container1->n_runs; i++) {
5172 uint32_t begin = container1->runs[i].value;
5173 if (container1->runs[i].length) {
5174 uint32_t end = begin + container1->runs[i].length + 1;
5175 if (!bitset_container_contains_range(bitset: container2, pos_start: begin, pos_end: end)) {
5176 return false;
5177 }
5178 } else {
5179 if (!bitset_container_contains(bitset: container2, pos: begin)) {
5180 return false;
5181 }
5182 }
5183 }
5184
5185 return true;
5186}
5187/* end file src/containers/mixed_equal.c */
5188/* begin file src/containers/mixed_intersection.c */
5189/*
5190 * mixed_intersection.c
5191 *
5192 */
5193
5194
5195/* Compute the intersection of src_1 and src_2 and write the result to
5196 * dst. */
5197void array_bitset_container_intersection(const array_container_t *src_1,
5198 const bitset_container_t *src_2,
5199 array_container_t *dst) {
5200 if (dst->capacity < src_1->cardinality) {
5201 array_container_grow(container: dst, min: src_1->cardinality, false);
5202 }
5203 int32_t newcard = 0; // dst could be src_1
5204 const int32_t origcard = src_1->cardinality;
5205 for (int i = 0; i < origcard; ++i) {
5206 uint16_t key = src_1->array[i];
5207 // this branchless approach is much faster...
5208 dst->array[newcard] = key;
5209 newcard += bitset_container_contains(bitset: src_2, pos: key);
5210 /**
5211 * we could do it this way instead...
5212 * if (bitset_container_contains(src_2, key)) {
5213 * dst->array[newcard++] = key;
5214 * }
5215 * but if the result is unpredictable, the processor generates
5216 * many mispredicted branches.
5217 * Difference can be huge (from 3 cycles when predictable all the way
5218 * to 16 cycles when unpredictable.
5219 * See
5220 * https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/master/extra/bitset/c/arraybitsetintersection.c
5221 */
5222 }
5223 dst->cardinality = newcard;
5224}
5225
5226/* Compute the size of the intersection of src_1 and src_2. */
5227int array_bitset_container_intersection_cardinality(
5228 const array_container_t *src_1, const bitset_container_t *src_2) {
5229 int32_t newcard = 0;
5230 const int32_t origcard = src_1->cardinality;
5231 for (int i = 0; i < origcard; ++i) {
5232 uint16_t key = src_1->array[i];
5233 newcard += bitset_container_contains(bitset: src_2, pos: key);
5234 }
5235 return newcard;
5236}
5237
5238
5239bool array_bitset_container_intersect(const array_container_t *src_1,
5240 const bitset_container_t *src_2) {
5241 const int32_t origcard = src_1->cardinality;
5242 for (int i = 0; i < origcard; ++i) {
5243 uint16_t key = src_1->array[i];
5244 if(bitset_container_contains(bitset: src_2, pos: key)) return true;
5245 }
5246 return false;
5247}
5248
5249/* Compute the intersection of src_1 and src_2 and write the result to
5250 * dst. It is allowed for dst to be equal to src_1. We assume that dst is a
5251 * valid container. */
5252void array_run_container_intersection(const array_container_t *src_1,
5253 const run_container_t *src_2,
5254 array_container_t *dst) {
5255 if (run_container_is_full(run: src_2)) {
5256 if (dst != src_1) array_container_copy(src: src_1, dst);
5257 return;
5258 }
5259 if (dst->capacity < src_1->cardinality) {
5260 array_container_grow(container: dst, min: src_1->cardinality, false);
5261 }
5262 if (src_2->n_runs == 0) {
5263 return;
5264 }
5265 int32_t rlepos = 0;
5266 int32_t arraypos = 0;
5267 rle16_t rle = src_2->runs[rlepos];
5268 int32_t newcard = 0;
5269 while (arraypos < src_1->cardinality) {
5270 const uint16_t arrayval = src_1->array[arraypos];
5271 while (rle.value + rle.length <
5272 arrayval) { // this will frequently be false
5273 ++rlepos;
5274 if (rlepos == src_2->n_runs) {
5275 dst->cardinality = newcard;
5276 return; // we are done
5277 }
5278 rle = src_2->runs[rlepos];
5279 }
5280 if (rle.value > arrayval) {
5281 arraypos = advanceUntil(array: src_1->array, pos: arraypos, length: src_1->cardinality,
5282 min: rle.value);
5283 } else {
5284 dst->array[newcard] = arrayval;
5285 newcard++;
5286 arraypos++;
5287 }
5288 }
5289 dst->cardinality = newcard;
5290}
5291
5292/* Compute the intersection of src_1 and src_2 and write the result to
5293 * *dst. If the result is true then the result is a bitset_container_t
5294 * otherwise is a array_container_t. If *dst == src_2, an in-place processing
5295 * is attempted.*/
5296bool run_bitset_container_intersection(const run_container_t *src_1,
5297 const bitset_container_t *src_2,
5298 void **dst) {
5299 if (run_container_is_full(run: src_1)) {
5300 if (*dst != src_2) *dst = bitset_container_clone(src: src_2);
5301 return true;
5302 }
5303 int32_t card = run_container_cardinality(run: src_1);
5304 if (card <= DEFAULT_MAX_SIZE) {
5305 // result can only be an array (assuming that we never make a
5306 // RunContainer)
5307 if (card > src_2->cardinality) {
5308 card = src_2->cardinality;
5309 }
5310 array_container_t *answer = array_container_create_given_capacity(size: card);
5311 *dst = answer;
5312 if (*dst == NULL) {
5313 return false;
5314 }
5315 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
5316 rle16_t rle = src_1->runs[rlepos];
5317 uint32_t endofrun = (uint32_t)rle.value + rle.length;
5318 for (uint32_t runValue = rle.value; runValue <= endofrun;
5319 ++runValue) {
5320 answer->array[answer->cardinality] = (uint16_t)runValue;
5321 answer->cardinality +=
5322 bitset_container_contains(bitset: src_2, pos: runValue);
5323 }
5324 }
5325 return false;
5326 }
5327 if (*dst == src_2) { // we attempt in-place
5328 bitset_container_t *answer = (bitset_container_t *)*dst;
5329 uint32_t start = 0;
5330 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
5331 const rle16_t rle = src_1->runs[rlepos];
5332 uint32_t end = rle.value;
5333 bitset_reset_range(bitmap: src_2->array, start, end);
5334
5335 start = end + rle.length + 1;
5336 }
5337 bitset_reset_range(bitmap: src_2->array, start, UINT32_C(1) << 16);
5338 answer->cardinality = bitset_container_compute_cardinality(bitset: answer);
5339 if (src_2->cardinality > DEFAULT_MAX_SIZE) {
5340 return true;
5341 } else {
5342 array_container_t *newanswer = array_container_from_bitset(bits: src_2);
5343 if (newanswer == NULL) {
5344 *dst = NULL;
5345 return false;
5346 }
5347 *dst = newanswer;
5348 return false;
5349 }
5350 } else { // no inplace
5351 // we expect the answer to be a bitmap (if we are lucky)
5352 bitset_container_t *answer = bitset_container_clone(src: src_2);
5353
5354 *dst = answer;
5355 if (answer == NULL) {
5356 return true;
5357 }
5358 uint32_t start = 0;
5359 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
5360 const rle16_t rle = src_1->runs[rlepos];
5361 uint32_t end = rle.value;
5362 bitset_reset_range(bitmap: answer->array, start, end);
5363 start = end + rle.length + 1;
5364 }
5365 bitset_reset_range(bitmap: answer->array, start, UINT32_C(1) << 16);
5366 answer->cardinality = bitset_container_compute_cardinality(bitset: answer);
5367
5368 if (answer->cardinality > DEFAULT_MAX_SIZE) {
5369 return true;
5370 } else {
5371 array_container_t *newanswer = array_container_from_bitset(bits: answer);
5372 bitset_container_free(bitset: (bitset_container_t *)*dst);
5373 if (newanswer == NULL) {
5374 *dst = NULL;
5375 return false;
5376 }
5377 *dst = newanswer;
5378 return false;
5379 }
5380 }
5381}
5382
5383/* Compute the size of the intersection between src_1 and src_2 . */
5384int array_run_container_intersection_cardinality(const array_container_t *src_1,
5385 const run_container_t *src_2) {
5386 if (run_container_is_full(run: src_2)) {
5387 return src_1->cardinality;
5388 }
5389 if (src_2->n_runs == 0) {
5390 return 0;
5391 }
5392 int32_t rlepos = 0;
5393 int32_t arraypos = 0;
5394 rle16_t rle = src_2->runs[rlepos];
5395 int32_t newcard = 0;
5396 while (arraypos < src_1->cardinality) {
5397 const uint16_t arrayval = src_1->array[arraypos];
5398 while (rle.value + rle.length <
5399 arrayval) { // this will frequently be false
5400 ++rlepos;
5401 if (rlepos == src_2->n_runs) {
5402 return newcard; // we are done
5403 }
5404 rle = src_2->runs[rlepos];
5405 }
5406 if (rle.value > arrayval) {
5407 arraypos = advanceUntil(array: src_1->array, pos: arraypos, length: src_1->cardinality,
5408 min: rle.value);
5409 } else {
5410 newcard++;
5411 arraypos++;
5412 }
5413 }
5414 return newcard;
5415}
5416
5417/* Compute the intersection between src_1 and src_2
5418 **/
5419int run_bitset_container_intersection_cardinality(
5420 const run_container_t *src_1, const bitset_container_t *src_2) {
5421 if (run_container_is_full(run: src_1)) {
5422 return bitset_container_cardinality(bitset: src_2);
5423 }
5424 int answer = 0;
5425 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
5426 rle16_t rle = src_1->runs[rlepos];
5427 answer +=
5428 bitset_lenrange_cardinality(bitmap: src_2->array, start: rle.value, lenminusone: rle.length);
5429 }
5430 return answer;
5431}
5432
5433
5434bool array_run_container_intersect(const array_container_t *src_1,
5435 const run_container_t *src_2) {
5436 if( run_container_is_full(run: src_2) ) {
5437 return !array_container_empty(array: src_1);
5438 }
5439 if (src_2->n_runs == 0) {
5440 return false;
5441 }
5442 int32_t rlepos = 0;
5443 int32_t arraypos = 0;
5444 rle16_t rle = src_2->runs[rlepos];
5445 while (arraypos < src_1->cardinality) {
5446 const uint16_t arrayval = src_1->array[arraypos];
5447 while (rle.value + rle.length <
5448 arrayval) { // this will frequently be false
5449 ++rlepos;
5450 if (rlepos == src_2->n_runs) {
5451 return false; // we are done
5452 }
5453 rle = src_2->runs[rlepos];
5454 }
5455 if (rle.value > arrayval) {
5456 arraypos = advanceUntil(array: src_1->array, pos: arraypos, length: src_1->cardinality,
5457 min: rle.value);
5458 } else {
5459 return true;
5460 }
5461 }
5462 return false;
5463}
5464
5465/* Compute the intersection between src_1 and src_2
5466 **/
5467bool run_bitset_container_intersect(const run_container_t *src_1,
5468 const bitset_container_t *src_2) {
5469 if( run_container_is_full(run: src_1) ) {
5470 return !bitset_container_empty(bitset: src_2);
5471 }
5472 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
5473 rle16_t rle = src_1->runs[rlepos];
5474 if(!bitset_lenrange_empty(bitmap: src_2->array, start: rle.value,lenminusone: rle.length)) return true;
5475 }
5476 return false;
5477}
5478
5479/*
5480 * Compute the intersection between src_1 and src_2 and write the result
5481 * to *dst. If the return function is true, the result is a bitset_container_t
5482 * otherwise is a array_container_t.
5483 */
5484bool bitset_bitset_container_intersection(const bitset_container_t *src_1,
5485 const bitset_container_t *src_2,
5486 void **dst) {
5487 const int newCardinality = bitset_container_and_justcard(src_1, src_2);
5488 if (newCardinality > DEFAULT_MAX_SIZE) {
5489 *dst = bitset_container_create();
5490 if (*dst != NULL) {
5491 bitset_container_and_nocard(src_1, src_2,
5492 dst: (bitset_container_t *)*dst);
5493 ((bitset_container_t *)*dst)->cardinality = newCardinality;
5494 }
5495 return true; // it is a bitset
5496 }
5497 *dst = array_container_create_given_capacity(size: newCardinality);
5498 if (*dst != NULL) {
5499 ((array_container_t *)*dst)->cardinality = newCardinality;
5500 bitset_extract_intersection_setbits_uint16(
5501 bitset1: ((const bitset_container_t *)src_1)->array,
5502 bitset2: ((const bitset_container_t *)src_2)->array,
5503 length: BITSET_CONTAINER_SIZE_IN_WORDS, out: ((array_container_t *)*dst)->array,
5504 base: 0);
5505 }
5506 return false; // not a bitset
5507}
5508
5509bool bitset_bitset_container_intersection_inplace(
5510 bitset_container_t *src_1, const bitset_container_t *src_2, void **dst) {
5511 const int newCardinality = bitset_container_and_justcard(src_1, src_2);
5512 if (newCardinality > DEFAULT_MAX_SIZE) {
5513 *dst = src_1;
5514 bitset_container_and_nocard(src_1, src_2, dst: src_1);
5515 ((bitset_container_t *)*dst)->cardinality = newCardinality;
5516 return true; // it is a bitset
5517 }
5518 *dst = array_container_create_given_capacity(size: newCardinality);
5519 if (*dst != NULL) {
5520 ((array_container_t *)*dst)->cardinality = newCardinality;
5521 bitset_extract_intersection_setbits_uint16(
5522 bitset1: ((const bitset_container_t *)src_1)->array,
5523 bitset2: ((const bitset_container_t *)src_2)->array,
5524 length: BITSET_CONTAINER_SIZE_IN_WORDS, out: ((array_container_t *)*dst)->array,
5525 base: 0);
5526 }
5527 return false; // not a bitset
5528}
5529/* end file src/containers/mixed_intersection.c */
5530/* begin file src/containers/mixed_negation.c */
5531/*
5532 * mixed_negation.c
5533 *
5534 */
5535
5536#include <assert.h>
5537#include <string.h>
5538
5539
5540// TODO: make simplified and optimized negation code across
5541// the full range.
5542
5543/* Negation across the entire range of the container.
5544 * Compute the negation of src and write the result
5545 * to *dst. The complement of a
5546 * sufficiently sparse set will always be dense and a hence a bitmap
5547' * We assume that dst is pre-allocated and a valid bitset container
5548 * There can be no in-place version.
5549 */
5550void array_container_negation(const array_container_t *src,
5551 bitset_container_t *dst) {
5552 uint64_t card = UINT64_C(1 << 16);
5553 bitset_container_set_all(bitset: dst);
5554
5555 dst->cardinality = (int32_t)bitset_clear_list(bitset: dst->array, card, list: src->array,
5556 length: (uint64_t)src->cardinality);
5557}
5558
5559/* Negation across the entire range of the container
5560 * Compute the negation of src and write the result
5561 * to *dst. A true return value indicates a bitset result,
5562 * otherwise the result is an array container.
5563 * We assume that dst is not pre-allocated. In
5564 * case of failure, *dst will be NULL.
5565 */
5566bool bitset_container_negation(const bitset_container_t *src, void **dst) {
5567 return bitset_container_negation_range(src, range_start: 0, range_end: (1 << 16), dst);
5568}
5569
5570/* inplace version */
5571/*
5572 * Same as bitset_container_negation except that if the output is to
5573 * be a
5574 * bitset_container_t, then src is modified and no allocation is made.
5575 * If the output is to be an array_container_t, then caller is responsible
5576 * to free the container.
5577 * In all cases, the result is in *dst.
5578 */
5579bool bitset_container_negation_inplace(bitset_container_t *src, void **dst) {
5580 return bitset_container_negation_range_inplace(src, range_start: 0, range_end: (1 << 16), dst);
5581}
5582
5583/* Negation across the entire range of container
5584 * Compute the negation of src and write the result
5585 * to *dst. Return values are the *_TYPECODES as defined * in containers.h
5586 * We assume that dst is not pre-allocated. In
5587 * case of failure, *dst will be NULL.
5588 */
5589int run_container_negation(const run_container_t *src, void **dst) {
5590 return run_container_negation_range(src, range_start: 0, range_end: (1 << 16), dst);
5591}
5592
5593/*
5594 * Same as run_container_negation except that if the output is to
5595 * be a
5596 * run_container_t, and has the capacity to hold the result,
5597 * then src is modified and no allocation is made.
5598 * In all cases, the result is in *dst.
5599 */
5600int run_container_negation_inplace(run_container_t *src, void **dst) {
5601 return run_container_negation_range_inplace(src, range_start: 0, range_end: (1 << 16), dst);
5602}
5603
5604/* Negation across a range of the container.
5605 * Compute the negation of src and write the result
5606 * to *dst. Returns true if the result is a bitset container
5607 * and false for an array container. *dst is not preallocated.
5608 */
5609bool array_container_negation_range(const array_container_t *src,
5610 const int range_start, const int range_end,
5611 void **dst) {
5612 /* close port of the Java implementation */
5613 if (range_start >= range_end) {
5614 *dst = array_container_clone(src);
5615 return false;
5616 }
5617
5618 int32_t start_index =
5619 binarySearch(array: src->array, lenarray: src->cardinality, ikey: (uint16_t)range_start);
5620 if (start_index < 0) start_index = -start_index - 1;
5621
5622 int32_t last_index =
5623 binarySearch(array: src->array, lenarray: src->cardinality, ikey: (uint16_t)(range_end - 1));
5624 if (last_index < 0) last_index = -last_index - 2;
5625
5626 const int32_t current_values_in_range = last_index - start_index + 1;
5627 const int32_t span_to_be_flipped = range_end - range_start;
5628 const int32_t new_values_in_range =
5629 span_to_be_flipped - current_values_in_range;
5630 const int32_t cardinality_change =
5631 new_values_in_range - current_values_in_range;
5632 const int32_t new_cardinality = src->cardinality + cardinality_change;
5633
5634 if (new_cardinality > DEFAULT_MAX_SIZE) {
5635 bitset_container_t *temp = bitset_container_from_array(a: src);
5636 bitset_flip_range(bitmap: temp->array, start: (uint32_t)range_start,
5637 end: (uint32_t)range_end);
5638 temp->cardinality = new_cardinality;
5639 *dst = temp;
5640 return true;
5641 }
5642
5643 array_container_t *arr =
5644 array_container_create_given_capacity(size: new_cardinality);
5645 *dst = (void *)arr;
5646 if(new_cardinality == 0) {
5647 arr->cardinality = new_cardinality;
5648 return false; // we are done.
5649 }
5650 // copy stuff before the active area
5651 memcpy(dest: arr->array, src: src->array, n: start_index * sizeof(uint16_t));
5652
5653 // work on the range
5654 int32_t out_pos = start_index, in_pos = start_index;
5655 int32_t val_in_range = range_start;
5656 for (; val_in_range < range_end && in_pos <= last_index; ++val_in_range) {
5657 if ((uint16_t)val_in_range != src->array[in_pos]) {
5658 arr->array[out_pos++] = (uint16_t)val_in_range;
5659 } else {
5660 ++in_pos;
5661 }
5662 }
5663 for (; val_in_range < range_end; ++val_in_range)
5664 arr->array[out_pos++] = (uint16_t)val_in_range;
5665
5666 // content after the active range
5667 memcpy(dest: arr->array + out_pos, src: src->array + (last_index + 1),
5668 n: (src->cardinality - (last_index + 1)) * sizeof(uint16_t));
5669 arr->cardinality = new_cardinality;
5670 return false;
5671}
5672
5673/* Even when the result would fit, it is unclear how to make an
5674 * inplace version without inefficient copying.
5675 */
5676
5677bool array_container_negation_range_inplace(array_container_t *src,
5678 const int range_start,
5679 const int range_end, void **dst) {
5680 bool ans = array_container_negation_range(src, range_start, range_end, dst);
5681 // TODO : try a real inplace version
5682 array_container_free(arr: src);
5683 return ans;
5684}
5685
5686/* Negation across a range of the container
5687 * Compute the negation of src and write the result
5688 * to *dst. A true return value indicates a bitset result,
5689 * otherwise the result is an array container.
5690 * We assume that dst is not pre-allocated. In
5691 * case of failure, *dst will be NULL.
5692 */
5693bool bitset_container_negation_range(const bitset_container_t *src,
5694 const int range_start, const int range_end,
5695 void **dst) {
5696 // TODO maybe consider density-based estimate
5697 // and sometimes build result directly as array, with
5698 // conversion back to bitset if wrong. Or determine
5699 // actual result cardinality, then go directly for the known final cont.
5700
5701 // keep computation using bitsets as long as possible.
5702 bitset_container_t *t = bitset_container_clone(src);
5703 bitset_flip_range(bitmap: t->array, start: (uint32_t)range_start, end: (uint32_t)range_end);
5704 t->cardinality = bitset_container_compute_cardinality(bitset: t);
5705
5706 if (t->cardinality > DEFAULT_MAX_SIZE) {
5707 *dst = t;
5708 return true;
5709 } else {
5710 *dst = array_container_from_bitset(bits: t);
5711 bitset_container_free(bitset: t);
5712 return false;
5713 }
5714}
5715
5716/* inplace version */
5717/*
5718 * Same as bitset_container_negation except that if the output is to
5719 * be a
5720 * bitset_container_t, then src is modified and no allocation is made.
5721 * If the output is to be an array_container_t, then caller is responsible
5722 * to free the container.
5723 * In all cases, the result is in *dst.
5724 */
5725bool bitset_container_negation_range_inplace(bitset_container_t *src,
5726 const int range_start,
5727 const int range_end, void **dst) {
5728 bitset_flip_range(bitmap: src->array, start: (uint32_t)range_start, end: (uint32_t)range_end);
5729 src->cardinality = bitset_container_compute_cardinality(bitset: src);
5730 if (src->cardinality > DEFAULT_MAX_SIZE) {
5731 *dst = src;
5732 return true;
5733 }
5734 *dst = array_container_from_bitset(bits: src);
5735 bitset_container_free(bitset: src);
5736 return false;
5737}
5738
5739/* Negation across a range of container
5740 * Compute the negation of src and write the result
5741 * to *dst. Return values are the *_TYPECODES as defined * in containers.h
5742 * We assume that dst is not pre-allocated. In
5743 * case of failure, *dst will be NULL.
5744 */
5745int run_container_negation_range(const run_container_t *src,
5746 const int range_start, const int range_end,
5747 void **dst) {
5748 uint8_t return_typecode;
5749
5750 // follows the Java implementation
5751 if (range_end <= range_start) {
5752 *dst = run_container_clone(src);
5753 return RUN_CONTAINER_TYPE_CODE;
5754 }
5755
5756 run_container_t *ans = run_container_create_given_capacity(
5757 size: src->n_runs + 1); // src->n_runs + 1);
5758 int k = 0;
5759 for (; k < src->n_runs && src->runs[k].value < range_start; ++k) {
5760 ans->runs[k] = src->runs[k];
5761 ans->n_runs++;
5762 }
5763
5764 run_container_smart_append_exclusive(
5765 src: ans, start: (uint16_t)range_start, length: (uint16_t)(range_end - range_start - 1));
5766
5767 for (; k < src->n_runs; ++k) {
5768 run_container_smart_append_exclusive(src: ans, start: src->runs[k].value,
5769 length: src->runs[k].length);
5770 }
5771
5772 *dst = convert_run_to_efficient_container(c: ans, typecode_after: &return_typecode);
5773 if (return_typecode != RUN_CONTAINER_TYPE_CODE) run_container_free(run: ans);
5774
5775 return return_typecode;
5776}
5777
5778/*
5779 * Same as run_container_negation except that if the output is to
5780 * be a
5781 * run_container_t, and has the capacity to hold the result,
5782 * then src is modified and no allocation is made.
5783 * In all cases, the result is in *dst.
5784 */
5785int run_container_negation_range_inplace(run_container_t *src,
5786 const int range_start,
5787 const int range_end, void **dst) {
5788 uint8_t return_typecode;
5789
5790 if (range_end <= range_start) {
5791 *dst = src;
5792 return RUN_CONTAINER_TYPE_CODE;
5793 }
5794
5795 // TODO: efficient special case when range is 0 to 65535 inclusive
5796
5797 if (src->capacity == src->n_runs) {
5798 // no excess room. More checking to see if result can fit
5799 bool last_val_before_range = false;
5800 bool first_val_in_range = false;
5801 bool last_val_in_range = false;
5802 bool first_val_past_range = false;
5803
5804 if (range_start > 0)
5805 last_val_before_range =
5806 run_container_contains(run: src, pos: (uint16_t)(range_start - 1));
5807 first_val_in_range = run_container_contains(run: src, pos: (uint16_t)range_start);
5808
5809 if (last_val_before_range == first_val_in_range) {
5810 last_val_in_range =
5811 run_container_contains(run: src, pos: (uint16_t)(range_end - 1));
5812 if (range_end != 0x10000)
5813 first_val_past_range =
5814 run_container_contains(run: src, pos: (uint16_t)range_end);
5815
5816 if (last_val_in_range ==
5817 first_val_past_range) { // no space for inplace
5818 int ans = run_container_negation_range(src, range_start,
5819 range_end, dst);
5820 run_container_free(run: src);
5821 return ans;
5822 }
5823 }
5824 }
5825 // all other cases: result will fit
5826
5827 run_container_t *ans = src;
5828 int my_nbr_runs = src->n_runs;
5829
5830 ans->n_runs = 0;
5831 int k = 0;
5832 for (; (k < my_nbr_runs) && (src->runs[k].value < range_start); ++k) {
5833 // ans->runs[k] = src->runs[k]; (would be self-copy)
5834 ans->n_runs++;
5835 }
5836
5837 // as with Java implementation, use locals to give self a buffer of depth 1
5838 rle16_t buffered = (rle16_t){.value = (uint16_t)0, .length = (uint16_t)0};
5839 rle16_t next = buffered;
5840 if (k < my_nbr_runs) buffered = src->runs[k];
5841
5842 run_container_smart_append_exclusive(
5843 src: ans, start: (uint16_t)range_start, length: (uint16_t)(range_end - range_start - 1));
5844
5845 for (; k < my_nbr_runs; ++k) {
5846 if (k + 1 < my_nbr_runs) next = src->runs[k + 1];
5847
5848 run_container_smart_append_exclusive(src: ans, start: buffered.value,
5849 length: buffered.length);
5850 buffered = next;
5851 }
5852
5853 *dst = convert_run_to_efficient_container(c: ans, typecode_after: &return_typecode);
5854 if (return_typecode != RUN_CONTAINER_TYPE_CODE) run_container_free(run: ans);
5855
5856 return return_typecode;
5857}
5858/* end file src/containers/mixed_negation.c */
5859/* begin file src/containers/mixed_subset.c */
5860
5861bool array_container_is_subset_bitset(const array_container_t* container1,
5862 const bitset_container_t* container2) {
5863 if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) {
5864 if (container2->cardinality < container1->cardinality) {
5865 return false;
5866 }
5867 }
5868 for (int i = 0; i < container1->cardinality; ++i) {
5869 if (!bitset_container_contains(bitset: container2, pos: container1->array[i])) {
5870 return false;
5871 }
5872 }
5873 return true;
5874}
5875
5876bool run_container_is_subset_array(const run_container_t* container1,
5877 const array_container_t* container2) {
5878 if (run_container_cardinality(run: container1) > container2->cardinality)
5879 return false;
5880 int32_t start_pos = -1, stop_pos = -1;
5881 for (int i = 0; i < container1->n_runs; ++i) {
5882 int32_t start = container1->runs[i].value;
5883 int32_t stop = start + container1->runs[i].length;
5884 start_pos = advanceUntil(array: container2->array, pos: stop_pos,
5885 length: container2->cardinality, min: start);
5886 stop_pos = advanceUntil(array: container2->array, pos: stop_pos,
5887 length: container2->cardinality, min: stop);
5888 if (start_pos == container2->cardinality) {
5889 return false;
5890 } else if (stop_pos - start_pos != stop - start ||
5891 container2->array[start_pos] != start ||
5892 container2->array[stop_pos] != stop) {
5893 return false;
5894 }
5895 }
5896 return true;
5897}
5898
5899bool array_container_is_subset_run(const array_container_t* container1,
5900 const run_container_t* container2) {
5901 if (container1->cardinality > run_container_cardinality(run: container2))
5902 return false;
5903 int i_array = 0, i_run = 0;
5904 while (i_array < container1->cardinality && i_run < container2->n_runs) {
5905 uint32_t start = container2->runs[i_run].value;
5906 uint32_t stop = start + container2->runs[i_run].length;
5907 if (container1->array[i_array] < start) {
5908 return false;
5909 } else if (container1->array[i_array] > stop) {
5910 i_run++;
5911 } else { // the value of the array is in the run
5912 i_array++;
5913 }
5914 }
5915 if (i_array == container1->cardinality) {
5916 return true;
5917 } else {
5918 return false;
5919 }
5920}
5921
5922bool run_container_is_subset_bitset(const run_container_t* container1,
5923 const bitset_container_t* container2) {
5924 // todo: this code could be much faster
5925 if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) {
5926 if (container2->cardinality < run_container_cardinality(run: container1)) {
5927 return false;
5928 }
5929 } else {
5930 int32_t card = bitset_container_compute_cardinality(
5931 bitset: container2); // modify container2?
5932 if (card < run_container_cardinality(run: container1)) {
5933 return false;
5934 }
5935 }
5936 for (int i = 0; i < container1->n_runs; ++i) {
5937 uint32_t run_start = container1->runs[i].value;
5938 uint32_t le = container1->runs[i].length;
5939 for (uint32_t j = run_start; j <= run_start + le; ++j) {
5940 if (!bitset_container_contains(bitset: container2, pos: j)) {
5941 return false;
5942 }
5943 }
5944 }
5945 return true;
5946}
5947
5948bool bitset_container_is_subset_run(const bitset_container_t* container1,
5949 const run_container_t* container2) {
5950 // todo: this code could be much faster
5951 if (container1->cardinality != BITSET_UNKNOWN_CARDINALITY) {
5952 if (container1->cardinality > run_container_cardinality(run: container2)) {
5953 return false;
5954 }
5955 }
5956 int32_t i_bitset = 0, i_run = 0;
5957 while (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS &&
5958 i_run < container2->n_runs) {
5959 uint64_t w = container1->array[i_bitset];
5960 while (w != 0 && i_run < container2->n_runs) {
5961 uint32_t start = container2->runs[i_run].value;
5962 uint32_t stop = start + container2->runs[i_run].length;
5963 uint64_t t = w & (~w + 1);
5964 uint16_t r = i_bitset * 64 + __builtin_ctzll(w);
5965 if (r < start) {
5966 return false;
5967 } else if (r > stop) {
5968 i_run++;
5969 continue;
5970 } else {
5971 w ^= t;
5972 }
5973 }
5974 if (w == 0) {
5975 i_bitset++;
5976 } else {
5977 return false;
5978 }
5979 }
5980 if (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS) {
5981 // terminated iterating on the run containers, check that rest of bitset
5982 // is empty
5983 for (; i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS; i_bitset++) {
5984 if (container1->array[i_bitset] != 0) {
5985 return false;
5986 }
5987 }
5988 }
5989 return true;
5990}
5991/* end file src/containers/mixed_subset.c */
5992/* begin file src/containers/mixed_union.c */
5993/*
5994 * mixed_union.c
5995 *
5996 */
5997
5998#include <assert.h>
5999#include <string.h>
6000
6001
6002/* Compute the union of src_1 and src_2 and write the result to
6003 * dst. */
6004void array_bitset_container_union(const array_container_t *src_1,
6005 const bitset_container_t *src_2,
6006 bitset_container_t *dst) {
6007 if (src_2 != dst) bitset_container_copy(source: src_2, dest: dst);
6008 dst->cardinality = (int32_t)bitset_set_list_withcard(
6009 bitset: dst->array, card: dst->cardinality, list: src_1->array, length: src_1->cardinality);
6010}
6011
6012/* Compute the union of src_1 and src_2 and write the result to
6013 * dst. It is allowed for src_2 to be dst. This version does not
6014 * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */
6015void array_bitset_container_lazy_union(const array_container_t *src_1,
6016 const bitset_container_t *src_2,
6017 bitset_container_t *dst) {
6018 if (src_2 != dst) bitset_container_copy(source: src_2, dest: dst);
6019 bitset_set_list(bitset: dst->array, list: src_1->array, length: src_1->cardinality);
6020 dst->cardinality = BITSET_UNKNOWN_CARDINALITY;
6021}
6022
6023void run_bitset_container_union(const run_container_t *src_1,
6024 const bitset_container_t *src_2,
6025 bitset_container_t *dst) {
6026 assert(!run_container_is_full(src_1)); // catch this case upstream
6027 if (src_2 != dst) bitset_container_copy(source: src_2, dest: dst);
6028 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
6029 rle16_t rle = src_1->runs[rlepos];
6030 bitset_set_lenrange(bitmap: dst->array, start: rle.value, lenminusone: rle.length);
6031 }
6032 dst->cardinality = bitset_container_compute_cardinality(bitset: dst);
6033}
6034
6035void run_bitset_container_lazy_union(const run_container_t *src_1,
6036 const bitset_container_t *src_2,
6037 bitset_container_t *dst) {
6038 assert(!run_container_is_full(src_1)); // catch this case upstream
6039 if (src_2 != dst) bitset_container_copy(source: src_2, dest: dst);
6040 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
6041 rle16_t rle = src_1->runs[rlepos];
6042 bitset_set_lenrange(bitmap: dst->array, start: rle.value, lenminusone: rle.length);
6043 }
6044 dst->cardinality = BITSET_UNKNOWN_CARDINALITY;
6045}
6046
6047// why do we leave the result as a run container??
6048void array_run_container_union(const array_container_t *src_1,
6049 const run_container_t *src_2,
6050 run_container_t *dst) {
6051 if (run_container_is_full(run: src_2)) {
6052 run_container_copy(src: src_2, dst);
6053 return;
6054 }
6055 // TODO: see whether the "2*" is spurious
6056 run_container_grow(run: dst, min: 2 * (src_1->cardinality + src_2->n_runs), false);
6057 int32_t rlepos = 0;
6058 int32_t arraypos = 0;
6059 rle16_t previousrle;
6060 if (src_2->runs[rlepos].value <= src_1->array[arraypos]) {
6061 previousrle = run_container_append_first(run: dst, vl: src_2->runs[rlepos]);
6062 rlepos++;
6063 } else {
6064 previousrle =
6065 run_container_append_value_first(run: dst, val: src_1->array[arraypos]);
6066 arraypos++;
6067 }
6068 while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) {
6069 if (src_2->runs[rlepos].value <= src_1->array[arraypos]) {
6070 run_container_append(run: dst, vl: src_2->runs[rlepos], previousrl: &previousrle);
6071 rlepos++;
6072 } else {
6073 run_container_append_value(run: dst, val: src_1->array[arraypos],
6074 previousrl: &previousrle);
6075 arraypos++;
6076 }
6077 }
6078 if (arraypos < src_1->cardinality) {
6079 while (arraypos < src_1->cardinality) {
6080 run_container_append_value(run: dst, val: src_1->array[arraypos],
6081 previousrl: &previousrle);
6082 arraypos++;
6083 }
6084 } else {
6085 while (rlepos < src_2->n_runs) {
6086 run_container_append(run: dst, vl: src_2->runs[rlepos], previousrl: &previousrle);
6087 rlepos++;
6088 }
6089 }
6090}
6091
6092void array_run_container_inplace_union(const array_container_t *src_1,
6093 run_container_t *src_2) {
6094 if (run_container_is_full(run: src_2)) {
6095 return;
6096 }
6097 const int32_t maxoutput = src_1->cardinality + src_2->n_runs;
6098 const int32_t neededcapacity = maxoutput + src_2->n_runs;
6099 if (src_2->capacity < neededcapacity)
6100 run_container_grow(run: src_2, min: neededcapacity, true);
6101 memmove(dest: src_2->runs + maxoutput, src: src_2->runs,
6102 n: src_2->n_runs * sizeof(rle16_t));
6103 rle16_t *inputsrc2 = src_2->runs + maxoutput;
6104 int32_t rlepos = 0;
6105 int32_t arraypos = 0;
6106 int src2nruns = src_2->n_runs;
6107 src_2->n_runs = 0;
6108
6109 rle16_t previousrle;
6110
6111 if (inputsrc2[rlepos].value <= src_1->array[arraypos]) {
6112 previousrle = run_container_append_first(run: src_2, vl: inputsrc2[rlepos]);
6113 rlepos++;
6114 } else {
6115 previousrle =
6116 run_container_append_value_first(run: src_2, val: src_1->array[arraypos]);
6117 arraypos++;
6118 }
6119
6120 while ((rlepos < src2nruns) && (arraypos < src_1->cardinality)) {
6121 if (inputsrc2[rlepos].value <= src_1->array[arraypos]) {
6122 run_container_append(run: src_2, vl: inputsrc2[rlepos], previousrl: &previousrle);
6123 rlepos++;
6124 } else {
6125 run_container_append_value(run: src_2, val: src_1->array[arraypos],
6126 previousrl: &previousrle);
6127 arraypos++;
6128 }
6129 }
6130 if (arraypos < src_1->cardinality) {
6131 while (arraypos < src_1->cardinality) {
6132 run_container_append_value(run: src_2, val: src_1->array[arraypos],
6133 previousrl: &previousrle);
6134 arraypos++;
6135 }
6136 } else {
6137 while (rlepos < src2nruns) {
6138 run_container_append(run: src_2, vl: inputsrc2[rlepos], previousrl: &previousrle);
6139 rlepos++;
6140 }
6141 }
6142}
6143
6144bool array_array_container_union(const array_container_t *src_1,
6145 const array_container_t *src_2, void **dst) {
6146 int totalCardinality = src_1->cardinality + src_2->cardinality;
6147 if (totalCardinality <= DEFAULT_MAX_SIZE) {
6148 *dst = array_container_create_given_capacity(size: totalCardinality);
6149 if (*dst != NULL) {
6150 array_container_union(array_1: src_1, array_2: src_2, out: (array_container_t *)*dst);
6151 } else {
6152 return true; // otherwise failure won't be caught
6153 }
6154 return false; // not a bitset
6155 }
6156 *dst = bitset_container_create();
6157 bool returnval = true; // expect a bitset
6158 if (*dst != NULL) {
6159 bitset_container_t *ourbitset = (bitset_container_t *)*dst;
6160 bitset_set_list(bitset: ourbitset->array, list: src_1->array, length: src_1->cardinality);
6161 ourbitset->cardinality = (int32_t)bitset_set_list_withcard(
6162 bitset: ourbitset->array, card: src_1->cardinality, list: src_2->array,
6163 length: src_2->cardinality);
6164 if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) {
6165 // need to convert!
6166 *dst = array_container_from_bitset(bits: ourbitset);
6167 bitset_container_free(bitset: ourbitset);
6168 returnval = false; // not going to be a bitset
6169 }
6170 }
6171 return returnval;
6172}
6173
6174bool array_array_container_inplace_union(array_container_t *src_1,
6175 const array_container_t *src_2, void **dst) {
6176 int totalCardinality = src_1->cardinality + src_2->cardinality;
6177 *dst = NULL;
6178 if (totalCardinality <= DEFAULT_MAX_SIZE) {
6179 if(src_1->capacity < totalCardinality) {
6180 *dst = array_container_create_given_capacity(size: 2 * totalCardinality); // be purposefully generous
6181 if (*dst != NULL) {
6182 array_container_union(array_1: src_1, array_2: src_2, out: (array_container_t *)*dst);
6183 } else {
6184 return true; // otherwise failure won't be caught
6185 }
6186 return false; // not a bitset
6187 } else {
6188 memmove(dest: src_1->array + src_2->cardinality, src: src_1->array, n: src_1->cardinality * sizeof(uint16_t));
6189 src_1->cardinality = (int32_t)union_uint16(set_1: src_1->array + src_2->cardinality, size_1: src_1->cardinality,
6190 set_2: src_2->array, size_2: src_2->cardinality, buffer: src_1->array);
6191 return false; // not a bitset
6192 }
6193 }
6194 *dst = bitset_container_create();
6195 bool returnval = true; // expect a bitset
6196 if (*dst != NULL) {
6197 bitset_container_t *ourbitset = (bitset_container_t *)*dst;
6198 bitset_set_list(bitset: ourbitset->array, list: src_1->array, length: src_1->cardinality);
6199 ourbitset->cardinality = (int32_t)bitset_set_list_withcard(
6200 bitset: ourbitset->array, card: src_1->cardinality, list: src_2->array,
6201 length: src_2->cardinality);
6202 if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) {
6203 // need to convert!
6204 if(src_1->capacity < ourbitset->cardinality) {
6205 array_container_grow(container: src_1, min: ourbitset->cardinality, false);
6206 }
6207
6208 bitset_extract_setbits_uint16(bitset: ourbitset->array, length: BITSET_CONTAINER_SIZE_IN_WORDS,
6209 out: src_1->array, base: 0);
6210 src_1->cardinality = ourbitset->cardinality;
6211 *dst = src_1;
6212 bitset_container_free(bitset: ourbitset);
6213 returnval = false; // not going to be a bitset
6214 }
6215 }
6216 return returnval;
6217}
6218
6219
6220bool array_array_container_lazy_union(const array_container_t *src_1,
6221 const array_container_t *src_2,
6222 void **dst) {
6223 int totalCardinality = src_1->cardinality + src_2->cardinality;
6224 if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) {
6225 *dst = array_container_create_given_capacity(size: totalCardinality);
6226 if (*dst != NULL) {
6227 array_container_union(array_1: src_1, array_2: src_2, out: (array_container_t *)*dst);
6228 } else {
6229 return true; // otherwise failure won't be caught
6230 }
6231 return false; // not a bitset
6232 }
6233 *dst = bitset_container_create();
6234 bool returnval = true; // expect a bitset
6235 if (*dst != NULL) {
6236 bitset_container_t *ourbitset = (bitset_container_t *)*dst;
6237 bitset_set_list(bitset: ourbitset->array, list: src_1->array, length: src_1->cardinality);
6238 bitset_set_list(bitset: ourbitset->array, list: src_2->array, length: src_2->cardinality);
6239 ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY;
6240 }
6241 return returnval;
6242}
6243
6244
6245bool array_array_container_lazy_inplace_union(array_container_t *src_1,
6246 const array_container_t *src_2,
6247 void **dst) {
6248 int totalCardinality = src_1->cardinality + src_2->cardinality;
6249 *dst = NULL;
6250 if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) {
6251 if(src_1->capacity < totalCardinality) {
6252 *dst = array_container_create_given_capacity(size: 2 * totalCardinality); // be purposefully generous
6253 if (*dst != NULL) {
6254 array_container_union(array_1: src_1, array_2: src_2, out: (array_container_t *)*dst);
6255 } else {
6256 return true; // otherwise failure won't be caught
6257 }
6258 return false; // not a bitset
6259 } else {
6260 memmove(dest: src_1->array + src_2->cardinality, src: src_1->array, n: src_1->cardinality * sizeof(uint16_t));
6261 src_1->cardinality = (int32_t)union_uint16(set_1: src_1->array + src_2->cardinality, size_1: src_1->cardinality,
6262 set_2: src_2->array, size_2: src_2->cardinality, buffer: src_1->array);
6263 return false; // not a bitset
6264 }
6265 }
6266 *dst = bitset_container_create();
6267 bool returnval = true; // expect a bitset
6268 if (*dst != NULL) {
6269 bitset_container_t *ourbitset = (bitset_container_t *)*dst;
6270 bitset_set_list(bitset: ourbitset->array, list: src_1->array, length: src_1->cardinality);
6271 bitset_set_list(bitset: ourbitset->array, list: src_2->array, length: src_2->cardinality);
6272 ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY;
6273 }
6274 return returnval;
6275}
6276/* end file src/containers/mixed_union.c */
6277/* begin file src/containers/mixed_xor.c */
6278/*
6279 * mixed_xor.c
6280 */
6281
6282#include <assert.h>
6283#include <string.h>
6284
6285
6286/* Compute the xor of src_1 and src_2 and write the result to
6287 * dst (which has no container initially).
6288 * Result is true iff dst is a bitset */
6289bool array_bitset_container_xor(const array_container_t *src_1,
6290 const bitset_container_t *src_2, void **dst) {
6291 bitset_container_t *result = bitset_container_create();
6292 bitset_container_copy(source: src_2, dest: result);
6293 result->cardinality = (int32_t)bitset_flip_list_withcard(
6294 bitset: result->array, card: result->cardinality, list: src_1->array, length: src_1->cardinality);
6295
6296 // do required type conversions.
6297 if (result->cardinality <= DEFAULT_MAX_SIZE) {
6298 *dst = array_container_from_bitset(bits: result);
6299 bitset_container_free(bitset: result);
6300 return false; // not bitset
6301 }
6302 *dst = result;
6303 return true; // bitset
6304}
6305
6306/* Compute the xor of src_1 and src_2 and write the result to
6307 * dst. It is allowed for src_2 to be dst. This version does not
6308 * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY).
6309 */
6310
6311void array_bitset_container_lazy_xor(const array_container_t *src_1,
6312 const bitset_container_t *src_2,
6313 bitset_container_t *dst) {
6314 if (src_2 != dst) bitset_container_copy(source: src_2, dest: dst);
6315 bitset_flip_list(bitset: dst->array, list: src_1->array, length: src_1->cardinality);
6316 dst->cardinality = BITSET_UNKNOWN_CARDINALITY;
6317}
6318
6319/* Compute the xor of src_1 and src_2 and write the result to
6320 * dst. Result may be either a bitset or an array container
6321 * (returns "result is bitset"). dst does not initially have
6322 * any container, but becomes either a bitset container (return
6323 * result true) or an array container.
6324 */
6325
6326bool run_bitset_container_xor(const run_container_t *src_1,
6327 const bitset_container_t *src_2, void **dst) {
6328 bitset_container_t *result = bitset_container_create();
6329
6330 bitset_container_copy(source: src_2, dest: result);
6331 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
6332 rle16_t rle = src_1->runs[rlepos];
6333 bitset_flip_range(bitmap: result->array, start: rle.value,
6334 end: rle.value + rle.length + UINT32_C(1));
6335 }
6336 result->cardinality = bitset_container_compute_cardinality(bitset: result);
6337
6338 if (result->cardinality <= DEFAULT_MAX_SIZE) {
6339 *dst = array_container_from_bitset(bits: result);
6340 bitset_container_free(bitset: result);
6341 return false; // not bitset
6342 }
6343 *dst = result;
6344 return true; // bitset
6345}
6346
6347/* lazy xor. Dst is initialized and may be equal to src_2.
6348 * Result is left as a bitset container, even if actual
6349 * cardinality would dictate an array container.
6350 */
6351
6352void run_bitset_container_lazy_xor(const run_container_t *src_1,
6353 const bitset_container_t *src_2,
6354 bitset_container_t *dst) {
6355 if (src_2 != dst) bitset_container_copy(source: src_2, dest: dst);
6356 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
6357 rle16_t rle = src_1->runs[rlepos];
6358 bitset_flip_range(bitmap: dst->array, start: rle.value,
6359 end: rle.value + rle.length + UINT32_C(1));
6360 }
6361 dst->cardinality = BITSET_UNKNOWN_CARDINALITY;
6362}
6363
6364/* dst does not indicate a valid container initially. Eventually it
6365 * can become any kind of container.
6366 */
6367
6368int array_run_container_xor(const array_container_t *src_1,
6369 const run_container_t *src_2, void **dst) {
6370 // semi following Java XOR implementation as of May 2016
6371 // the C OR implementation works quite differently and can return a run
6372 // container
6373 // TODO could optimize for full run containers.
6374
6375 // use of lazy following Java impl.
6376 const int arbitrary_threshold = 32;
6377 if (src_1->cardinality < arbitrary_threshold) {
6378 run_container_t *ans = run_container_create();
6379 array_run_container_lazy_xor(src_1, src_2, dst: ans); // keeps runs.
6380 uint8_t typecode_after;
6381 *dst =
6382 convert_run_to_efficient_container_and_free(c: ans, typecode_after: &typecode_after);
6383 return typecode_after;
6384 }
6385
6386 int card = run_container_cardinality(run: src_2);
6387 if (card <= DEFAULT_MAX_SIZE) {
6388 // Java implementation works with the array, xoring the run elements via
6389 // iterator
6390 array_container_t *temp = array_container_from_run(arr: src_2);
6391 bool ret_is_bitset = array_array_container_xor(src_1: temp, src_2: src_1, dst);
6392 array_container_free(arr: temp);
6393 return ret_is_bitset ? BITSET_CONTAINER_TYPE_CODE
6394 : ARRAY_CONTAINER_TYPE_CODE;
6395
6396 } else { // guess that it will end up as a bitset
6397 bitset_container_t *result = bitset_container_from_run(arr: src_2);
6398 bool is_bitset = bitset_array_container_ixor(src_1: result, src_2: src_1, dst);
6399 // any necessary type conversion has been done by the ixor
6400 int retval = (is_bitset ? BITSET_CONTAINER_TYPE_CODE
6401 : ARRAY_CONTAINER_TYPE_CODE);
6402 return retval;
6403 }
6404}
6405
6406/* Dst is a valid run container. (Can it be src_2? Let's say not.)
6407 * Leaves result as run container, even if other options are
6408 * smaller.
6409 */
6410
6411void array_run_container_lazy_xor(const array_container_t *src_1,
6412 const run_container_t *src_2,
6413 run_container_t *dst) {
6414 run_container_grow(run: dst, min: src_1->cardinality + src_2->n_runs, false);
6415 int32_t rlepos = 0;
6416 int32_t arraypos = 0;
6417 dst->n_runs = 0;
6418
6419 while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) {
6420 if (src_2->runs[rlepos].value <= src_1->array[arraypos]) {
6421 run_container_smart_append_exclusive(src: dst, start: src_2->runs[rlepos].value,
6422 length: src_2->runs[rlepos].length);
6423 rlepos++;
6424 } else {
6425 run_container_smart_append_exclusive(src: dst, start: src_1->array[arraypos],
6426 length: 0);
6427 arraypos++;
6428 }
6429 }
6430 while (arraypos < src_1->cardinality) {
6431 run_container_smart_append_exclusive(src: dst, start: src_1->array[arraypos], length: 0);
6432 arraypos++;
6433 }
6434 while (rlepos < src_2->n_runs) {
6435 run_container_smart_append_exclusive(src: dst, start: src_2->runs[rlepos].value,
6436 length: src_2->runs[rlepos].length);
6437 rlepos++;
6438 }
6439}
6440
6441/* dst does not indicate a valid container initially. Eventually it
6442 * can become any kind of container.
6443 */
6444
6445int run_run_container_xor(const run_container_t *src_1,
6446 const run_container_t *src_2, void **dst) {
6447 run_container_t *ans = run_container_create();
6448 run_container_xor(src_1, src_2, dst: ans);
6449 uint8_t typecode_after;
6450 *dst = convert_run_to_efficient_container_and_free(c: ans, typecode_after: &typecode_after);
6451 return typecode_after;
6452}
6453
6454/*
6455 * Java implementation (as of May 2016) for array_run, run_run
6456 * and bitset_run don't do anything different for inplace.
6457 * Could adopt the mixed_union.c approach instead (ie, using
6458 * smart_append_exclusive)
6459 *
6460 */
6461
6462bool array_array_container_xor(const array_container_t *src_1,
6463 const array_container_t *src_2, void **dst) {
6464 int totalCardinality =
6465 src_1->cardinality + src_2->cardinality; // upper bound
6466 if (totalCardinality <= DEFAULT_MAX_SIZE) {
6467 *dst = array_container_create_given_capacity(size: totalCardinality);
6468 array_container_xor(array_1: src_1, array_2: src_2, out: (array_container_t *)*dst);
6469 return false; // not a bitset
6470 }
6471 *dst = bitset_container_from_array(a: src_1);
6472 bool returnval = true; // expect a bitset
6473 bitset_container_t *ourbitset = (bitset_container_t *)*dst;
6474 ourbitset->cardinality = (uint32_t)bitset_flip_list_withcard(
6475 bitset: ourbitset->array, card: src_1->cardinality, list: src_2->array, length: src_2->cardinality);
6476 if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) {
6477 // need to convert!
6478 *dst = array_container_from_bitset(bits: ourbitset);
6479 bitset_container_free(bitset: ourbitset);
6480 returnval = false; // not going to be a bitset
6481 }
6482
6483 return returnval;
6484}
6485
6486bool array_array_container_lazy_xor(const array_container_t *src_1,
6487 const array_container_t *src_2,
6488 void **dst) {
6489 int totalCardinality = src_1->cardinality + src_2->cardinality;
6490 // upper bound, but probably poor estimate for xor
6491 if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) {
6492 *dst = array_container_create_given_capacity(size: totalCardinality);
6493 if (*dst != NULL)
6494 array_container_xor(array_1: src_1, array_2: src_2, out: (array_container_t *)*dst);
6495 return false; // not a bitset
6496 }
6497 *dst = bitset_container_from_array(a: src_1);
6498 bool returnval = true; // expect a bitset (maybe, for XOR??)
6499 if (*dst != NULL) {
6500 bitset_container_t *ourbitset = (bitset_container_t *)*dst;
6501 bitset_flip_list(bitset: ourbitset->array, list: src_2->array, length: src_2->cardinality);
6502 ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY;
6503 }
6504 return returnval;
6505}
6506
6507/* Compute the xor of src_1 and src_2 and write the result to
6508 * dst (which has no container initially). Return value is
6509 * "dst is a bitset"
6510 */
6511
6512bool bitset_bitset_container_xor(const bitset_container_t *src_1,
6513 const bitset_container_t *src_2, void **dst) {
6514 bitset_container_t *ans = bitset_container_create();
6515 int card = bitset_container_xor(src_1, src_2, dst: ans);
6516 if (card <= DEFAULT_MAX_SIZE) {
6517 *dst = array_container_from_bitset(bits: ans);
6518 bitset_container_free(bitset: ans);
6519 return false; // not bitset
6520 } else {
6521 *dst = ans;
6522 return true;
6523 }
6524}
6525
6526/* Compute the xor of src_1 and src_2 and write the result to
6527 * dst (which has no container initially). It will modify src_1
6528 * to be dst if the result is a bitset. Otherwise, it will
6529 * free src_1 and dst will be a new array container. In both
6530 * cases, the caller is responsible for deallocating dst.
6531 * Returns true iff dst is a bitset */
6532
6533bool bitset_array_container_ixor(bitset_container_t *src_1,
6534 const array_container_t *src_2, void **dst) {
6535 *dst = src_1;
6536 src_1->cardinality = (uint32_t)bitset_flip_list_withcard(
6537 bitset: src_1->array, card: src_1->cardinality, list: src_2->array, length: src_2->cardinality);
6538
6539 if (src_1->cardinality <= DEFAULT_MAX_SIZE) {
6540 *dst = array_container_from_bitset(bits: src_1);
6541 bitset_container_free(bitset: src_1);
6542 return false; // not bitset
6543 } else
6544 return true;
6545}
6546
6547/* a bunch of in-place, some of which may not *really* be inplace.
6548 * TODO: write actual inplace routine if efficiency warrants it
6549 * Anything inplace with a bitset is a good candidate
6550 */
6551
6552bool bitset_bitset_container_ixor(bitset_container_t *src_1,
6553 const bitset_container_t *src_2, void **dst) {
6554 bool ans = bitset_bitset_container_xor(src_1, src_2, dst);
6555 bitset_container_free(bitset: src_1);
6556 return ans;
6557}
6558
6559bool array_bitset_container_ixor(array_container_t *src_1,
6560 const bitset_container_t *src_2, void **dst) {
6561 bool ans = array_bitset_container_xor(src_1, src_2, dst);
6562 array_container_free(arr: src_1);
6563 return ans;
6564}
6565
6566/* Compute the xor of src_1 and src_2 and write the result to
6567 * dst. Result may be either a bitset or an array container
6568 * (returns "result is bitset"). dst does not initially have
6569 * any container, but becomes either a bitset container (return
6570 * result true) or an array container.
6571 */
6572
6573bool run_bitset_container_ixor(run_container_t *src_1,
6574 const bitset_container_t *src_2, void **dst) {
6575 bool ans = run_bitset_container_xor(src_1, src_2, dst);
6576 run_container_free(run: src_1);
6577 return ans;
6578}
6579
6580bool bitset_run_container_ixor(bitset_container_t *src_1,
6581 const run_container_t *src_2, void **dst) {
6582 bool ans = run_bitset_container_xor(src_1: src_2, src_2: src_1, dst);
6583 bitset_container_free(bitset: src_1);
6584 return ans;
6585}
6586
6587/* dst does not indicate a valid container initially. Eventually it
6588 * can become any kind of container.
6589 */
6590
6591int array_run_container_ixor(array_container_t *src_1,
6592 const run_container_t *src_2, void **dst) {
6593 int ans = array_run_container_xor(src_1, src_2, dst);
6594 array_container_free(arr: src_1);
6595 return ans;
6596}
6597
6598int run_array_container_ixor(run_container_t *src_1,
6599 const array_container_t *src_2, void **dst) {
6600 int ans = array_run_container_xor(src_1: src_2, src_2: src_1, dst);
6601 run_container_free(run: src_1);
6602 return ans;
6603}
6604
6605bool array_array_container_ixor(array_container_t *src_1,
6606 const array_container_t *src_2, void **dst) {
6607 bool ans = array_array_container_xor(src_1, src_2, dst);
6608 array_container_free(arr: src_1);
6609 return ans;
6610}
6611
6612int run_run_container_ixor(run_container_t *src_1, const run_container_t *src_2,
6613 void **dst) {
6614 int ans = run_run_container_xor(src_1, src_2, dst);
6615 run_container_free(run: src_1);
6616 return ans;
6617}
6618/* end file src/containers/mixed_xor.c */
6619/* begin file src/containers/run.c */
6620#include <stdio.h>
6621#include <stdlib.h>
6622
6623
6624bool run_container_add(run_container_t *run, uint16_t pos) {
6625 int32_t index = interleavedBinarySearch(array: run->runs, lenarray: run->n_runs, ikey: pos);
6626 if (index >= 0) return false; // already there
6627 index = -index - 2; // points to preceding value, possibly -1
6628 if (index >= 0) { // possible match
6629 int32_t offset = pos - run->runs[index].value;
6630 int32_t le = run->runs[index].length;
6631 if (offset <= le) return false; // already there
6632 if (offset == le + 1) {
6633 // we may need to fuse
6634 if (index + 1 < run->n_runs) {
6635 if (run->runs[index + 1].value == pos + 1) {
6636 // indeed fusion is needed
6637 run->runs[index].length = run->runs[index + 1].value +
6638 run->runs[index + 1].length -
6639 run->runs[index].value;
6640 recoverRoomAtIndex(run, index: (uint16_t)(index + 1));
6641 return true;
6642 }
6643 }
6644 run->runs[index].length++;
6645 return true;
6646 }
6647 if (index + 1 < run->n_runs) {
6648 // we may need to fuse
6649 if (run->runs[index + 1].value == pos + 1) {
6650 // indeed fusion is needed
6651 run->runs[index + 1].value = pos;
6652 run->runs[index + 1].length = run->runs[index + 1].length + 1;
6653 return true;
6654 }
6655 }
6656 }
6657 if (index == -1) {
6658 // we may need to extend the first run
6659 if (0 < run->n_runs) {
6660 if (run->runs[0].value == pos + 1) {
6661 run->runs[0].length++;
6662 run->runs[0].value--;
6663 return true;
6664 }
6665 }
6666 }
6667 makeRoomAtIndex(run, index: (uint16_t)(index + 1));
6668 run->runs[index + 1].value = pos;
6669 run->runs[index + 1].length = 0;
6670 return true;
6671}
6672
6673/* Create a new run container. Return NULL in case of failure. */
6674run_container_t *run_container_create_given_capacity(int32_t size) {
6675 run_container_t *run;
6676 /* Allocate the run container itself. */
6677 run = (run_container_t *)malloc(size: sizeof(run_container_t));
6678 assert (run);
6679 if (size <= 0) // we don't want to rely on malloc(0)
6680 run->runs = NULL;
6681 run->runs = (rle16_t *)malloc(size: sizeof(rle16_t) * size);
6682 assert (run->runs);
6683 run->capacity = size;
6684 run->n_runs = 0;
6685 return run;
6686}
6687
6688int run_container_shrink_to_fit(run_container_t *src) {
6689 if (src->n_runs == src->capacity) return 0; // nothing to do
6690 int savings = src->capacity - src->n_runs;
6691 src->capacity = src->n_runs;
6692 rle16_t *oldruns = src->runs;
6693 src->runs = (rle16_t *)realloc(ptr: oldruns, size: src->capacity * sizeof(rle16_t));
6694 if (src->runs == NULL) free(ptr: oldruns); // should never happen?
6695 return savings;
6696}
6697/* Create a new run container. Return NULL in case of failure. */
6698run_container_t *run_container_create(void) {
6699 return run_container_create_given_capacity(size: RUN_DEFAULT_INIT_SIZE);
6700}
6701
6702run_container_t *run_container_clone(const run_container_t *src) {
6703 run_container_t *run = run_container_create_given_capacity(size: src->capacity);
6704 if (run == NULL) return NULL;
6705 run->capacity = src->capacity;
6706 run->n_runs = src->n_runs;
6707 memcpy(dest: run->runs, src: src->runs, n: src->n_runs * sizeof(rle16_t));
6708 return run;
6709}
6710
6711/* Free memory. */
6712void run_container_free(run_container_t *run) {
6713 if(run->runs != NULL) {// Jon Strabala reports that some tools complain otherwise
6714 free(ptr: run->runs);
6715 run->runs = NULL; // pedantic
6716 }
6717 free(ptr: run);
6718}
6719
6720void run_container_grow(run_container_t *run, int32_t min, bool copy) {
6721 int32_t newCapacity =
6722 (run->capacity == 0)
6723 ? RUN_DEFAULT_INIT_SIZE
6724 : run->capacity < 64 ? run->capacity * 2
6725 : run->capacity < 1024 ? run->capacity * 3 / 2
6726 : run->capacity * 5 / 4;
6727 if (newCapacity < min) newCapacity = min;
6728 run->capacity = newCapacity;
6729 assert(run->capacity >= min);
6730 if (copy) {
6731 rle16_t *oldruns = run->runs;
6732 run->runs =
6733 (rle16_t *)realloc(ptr: oldruns, size: run->capacity * sizeof(rle16_t));
6734 if (run->runs == NULL) free(ptr: oldruns);
6735 } else {
6736 // Jon Strabala reports that some tools complain otherwise
6737 if (run->runs != NULL) {
6738 free(ptr: run->runs);
6739 }
6740 run->runs = (rle16_t *)malloc(size: run->capacity * sizeof(rle16_t));
6741 }
6742 // handle the case where realloc fails
6743 if (run->runs == NULL) {
6744 fprintf(stderr, format: "could not allocate memory\n");
6745 }
6746 assert(run->runs != NULL);
6747}
6748
6749/* copy one container into another */
6750void run_container_copy(const run_container_t *src, run_container_t *dst) {
6751 const int32_t n_runs = src->n_runs;
6752 if (src->n_runs > dst->capacity) {
6753 run_container_grow(run: dst, min: n_runs, false);
6754 }
6755 dst->n_runs = n_runs;
6756 memcpy(dest: dst->runs, src: src->runs, n: sizeof(rle16_t) * n_runs);
6757}
6758
6759/* Compute the union of `src_1' and `src_2' and write the result to `dst'
6760 * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
6761void run_container_union(const run_container_t *src_1,
6762 const run_container_t *src_2, run_container_t *dst) {
6763 // TODO: this could be a lot more efficient
6764
6765 // we start out with inexpensive checks
6766 const bool if1 = run_container_is_full(run: src_1);
6767 const bool if2 = run_container_is_full(run: src_2);
6768 if (if1 || if2) {
6769 if (if1) {
6770 run_container_copy(src: src_1, dst);
6771 return;
6772 }
6773 if (if2) {
6774 run_container_copy(src: src_2, dst);
6775 return;
6776 }
6777 }
6778 const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
6779 if (dst->capacity < neededcapacity)
6780 run_container_grow(run: dst, min: neededcapacity, false);
6781 dst->n_runs = 0;
6782 int32_t rlepos = 0;
6783 int32_t xrlepos = 0;
6784
6785 rle16_t previousrle;
6786 if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) {
6787 previousrle = run_container_append_first(run: dst, vl: src_1->runs[rlepos]);
6788 rlepos++;
6789 } else {
6790 previousrle = run_container_append_first(run: dst, vl: src_2->runs[xrlepos]);
6791 xrlepos++;
6792 }
6793
6794 while ((xrlepos < src_2->n_runs) && (rlepos < src_1->n_runs)) {
6795 rle16_t newrl;
6796 if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) {
6797 newrl = src_1->runs[rlepos];
6798 rlepos++;
6799 } else {
6800 newrl = src_2->runs[xrlepos];
6801 xrlepos++;
6802 }
6803 run_container_append(run: dst, vl: newrl, previousrl: &previousrle);
6804 }
6805 while (xrlepos < src_2->n_runs) {
6806 run_container_append(run: dst, vl: src_2->runs[xrlepos], previousrl: &previousrle);
6807 xrlepos++;
6808 }
6809 while (rlepos < src_1->n_runs) {
6810 run_container_append(run: dst, vl: src_1->runs[rlepos], previousrl: &previousrle);
6811 rlepos++;
6812 }
6813}
6814
6815/* Compute the union of `src_1' and `src_2' and write the result to `src_1'
6816 */
6817void run_container_union_inplace(run_container_t *src_1,
6818 const run_container_t *src_2) {
6819 // TODO: this could be a lot more efficient
6820
6821 // we start out with inexpensive checks
6822 const bool if1 = run_container_is_full(run: src_1);
6823 const bool if2 = run_container_is_full(run: src_2);
6824 if (if1 || if2) {
6825 if (if1) {
6826 return;
6827 }
6828 if (if2) {
6829 run_container_copy(src: src_2, dst: src_1);
6830 return;
6831 }
6832 }
6833 // we move the data to the end of the current array
6834 const int32_t maxoutput = src_1->n_runs + src_2->n_runs;
6835 const int32_t neededcapacity = maxoutput + src_1->n_runs;
6836 if (src_1->capacity < neededcapacity)
6837 run_container_grow(run: src_1, min: neededcapacity, true);
6838 memmove(dest: src_1->runs + maxoutput, src: src_1->runs,
6839 n: src_1->n_runs * sizeof(rle16_t));
6840 rle16_t *inputsrc1 = src_1->runs + maxoutput;
6841 const int32_t input1nruns = src_1->n_runs;
6842 src_1->n_runs = 0;
6843 int32_t rlepos = 0;
6844 int32_t xrlepos = 0;
6845
6846 rle16_t previousrle;
6847 if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) {
6848 previousrle = run_container_append_first(run: src_1, vl: inputsrc1[rlepos]);
6849 rlepos++;
6850 } else {
6851 previousrle = run_container_append_first(run: src_1, vl: src_2->runs[xrlepos]);
6852 xrlepos++;
6853 }
6854 while ((xrlepos < src_2->n_runs) && (rlepos < input1nruns)) {
6855 rle16_t newrl;
6856 if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) {
6857 newrl = inputsrc1[rlepos];
6858 rlepos++;
6859 } else {
6860 newrl = src_2->runs[xrlepos];
6861 xrlepos++;
6862 }
6863 run_container_append(run: src_1, vl: newrl, previousrl: &previousrle);
6864 }
6865 while (xrlepos < src_2->n_runs) {
6866 run_container_append(run: src_1, vl: src_2->runs[xrlepos], previousrl: &previousrle);
6867 xrlepos++;
6868 }
6869 while (rlepos < input1nruns) {
6870 run_container_append(run: src_1, vl: inputsrc1[rlepos], previousrl: &previousrle);
6871 rlepos++;
6872 }
6873}
6874
6875/* Compute the symmetric difference of `src_1' and `src_2' and write the result
6876 * to `dst'
6877 * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
6878void run_container_xor(const run_container_t *src_1,
6879 const run_container_t *src_2, run_container_t *dst) {
6880 // don't bother to convert xor with full range into negation
6881 // since negation is implemented similarly
6882
6883 const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
6884 if (dst->capacity < neededcapacity)
6885 run_container_grow(run: dst, min: neededcapacity, false);
6886
6887 int32_t pos1 = 0;
6888 int32_t pos2 = 0;
6889 dst->n_runs = 0;
6890
6891 while ((pos1 < src_1->n_runs) && (pos2 < src_2->n_runs)) {
6892 if (src_1->runs[pos1].value <= src_2->runs[pos2].value) {
6893 run_container_smart_append_exclusive(src: dst, start: src_1->runs[pos1].value,
6894 length: src_1->runs[pos1].length);
6895 pos1++;
6896 } else {
6897 run_container_smart_append_exclusive(src: dst, start: src_2->runs[pos2].value,
6898 length: src_2->runs[pos2].length);
6899 pos2++;
6900 }
6901 }
6902 while (pos1 < src_1->n_runs) {
6903 run_container_smart_append_exclusive(src: dst, start: src_1->runs[pos1].value,
6904 length: src_1->runs[pos1].length);
6905 pos1++;
6906 }
6907
6908 while (pos2 < src_2->n_runs) {
6909 run_container_smart_append_exclusive(src: dst, start: src_2->runs[pos2].value,
6910 length: src_2->runs[pos2].length);
6911 pos2++;
6912 }
6913}
6914
6915/* Compute the intersection of src_1 and src_2 and write the result to
6916 * dst. It is assumed that dst is distinct from both src_1 and src_2. */
6917void run_container_intersection(const run_container_t *src_1,
6918 const run_container_t *src_2,
6919 run_container_t *dst) {
6920 const bool if1 = run_container_is_full(run: src_1);
6921 const bool if2 = run_container_is_full(run: src_2);
6922 if (if1 || if2) {
6923 if (if1) {
6924 run_container_copy(src: src_2, dst);
6925 return;
6926 }
6927 if (if2) {
6928 run_container_copy(src: src_1, dst);
6929 return;
6930 }
6931 }
6932 // TODO: this could be a lot more efficient, could use SIMD optimizations
6933 const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
6934 if (dst->capacity < neededcapacity)
6935 run_container_grow(run: dst, min: neededcapacity, false);
6936 dst->n_runs = 0;
6937 int32_t rlepos = 0;
6938 int32_t xrlepos = 0;
6939 int32_t start = src_1->runs[rlepos].value;
6940 int32_t end = start + src_1->runs[rlepos].length + 1;
6941 int32_t xstart = src_2->runs[xrlepos].value;
6942 int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
6943 while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
6944 if (end <= xstart) {
6945 ++rlepos;
6946 if (rlepos < src_1->n_runs) {
6947 start = src_1->runs[rlepos].value;
6948 end = start + src_1->runs[rlepos].length + 1;
6949 }
6950 } else if (xend <= start) {
6951 ++xrlepos;
6952 if (xrlepos < src_2->n_runs) {
6953 xstart = src_2->runs[xrlepos].value;
6954 xend = xstart + src_2->runs[xrlepos].length + 1;
6955 }
6956 } else { // they overlap
6957 const int32_t lateststart = start > xstart ? start : xstart;
6958 int32_t earliestend;
6959 if (end == xend) { // improbable
6960 earliestend = end;
6961 rlepos++;
6962 xrlepos++;
6963 if (rlepos < src_1->n_runs) {
6964 start = src_1->runs[rlepos].value;
6965 end = start + src_1->runs[rlepos].length + 1;
6966 }
6967 if (xrlepos < src_2->n_runs) {
6968 xstart = src_2->runs[xrlepos].value;
6969 xend = xstart + src_2->runs[xrlepos].length + 1;
6970 }
6971 } else if (end < xend) {
6972 earliestend = end;
6973 rlepos++;
6974 if (rlepos < src_1->n_runs) {
6975 start = src_1->runs[rlepos].value;
6976 end = start + src_1->runs[rlepos].length + 1;
6977 }
6978
6979 } else { // end > xend
6980 earliestend = xend;
6981 xrlepos++;
6982 if (xrlepos < src_2->n_runs) {
6983 xstart = src_2->runs[xrlepos].value;
6984 xend = xstart + src_2->runs[xrlepos].length + 1;
6985 }
6986 }
6987 dst->runs[dst->n_runs].value = (uint16_t)lateststart;
6988 dst->runs[dst->n_runs].length =
6989 (uint16_t)(earliestend - lateststart - 1);
6990 dst->n_runs++;
6991 }
6992 }
6993}
6994
6995/* Compute the size of the intersection of src_1 and src_2 . */
6996int run_container_intersection_cardinality(const run_container_t *src_1,
6997 const run_container_t *src_2) {
6998 const bool if1 = run_container_is_full(run: src_1);
6999 const bool if2 = run_container_is_full(run: src_2);
7000 if (if1 || if2) {
7001 if (if1) {
7002 return run_container_cardinality(run: src_2);
7003 }
7004 if (if2) {
7005 return run_container_cardinality(run: src_1);
7006 }
7007 }
7008 int answer = 0;
7009 int32_t rlepos = 0;
7010 int32_t xrlepos = 0;
7011 int32_t start = src_1->runs[rlepos].value;
7012 int32_t end = start + src_1->runs[rlepos].length + 1;
7013 int32_t xstart = src_2->runs[xrlepos].value;
7014 int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
7015 while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
7016 if (end <= xstart) {
7017 ++rlepos;
7018 if (rlepos < src_1->n_runs) {
7019 start = src_1->runs[rlepos].value;
7020 end = start + src_1->runs[rlepos].length + 1;
7021 }
7022 } else if (xend <= start) {
7023 ++xrlepos;
7024 if (xrlepos < src_2->n_runs) {
7025 xstart = src_2->runs[xrlepos].value;
7026 xend = xstart + src_2->runs[xrlepos].length + 1;
7027 }
7028 } else { // they overlap
7029 const int32_t lateststart = start > xstart ? start : xstart;
7030 int32_t earliestend;
7031 if (end == xend) { // improbable
7032 earliestend = end;
7033 rlepos++;
7034 xrlepos++;
7035 if (rlepos < src_1->n_runs) {
7036 start = src_1->runs[rlepos].value;
7037 end = start + src_1->runs[rlepos].length + 1;
7038 }
7039 if (xrlepos < src_2->n_runs) {
7040 xstart = src_2->runs[xrlepos].value;
7041 xend = xstart + src_2->runs[xrlepos].length + 1;
7042 }
7043 } else if (end < xend) {
7044 earliestend = end;
7045 rlepos++;
7046 if (rlepos < src_1->n_runs) {
7047 start = src_1->runs[rlepos].value;
7048 end = start + src_1->runs[rlepos].length + 1;
7049 }
7050
7051 } else { // end > xend
7052 earliestend = xend;
7053 xrlepos++;
7054 if (xrlepos < src_2->n_runs) {
7055 xstart = src_2->runs[xrlepos].value;
7056 xend = xstart + src_2->runs[xrlepos].length + 1;
7057 }
7058 }
7059 answer += earliestend - lateststart;
7060 }
7061 }
7062 return answer;
7063}
7064
7065bool run_container_intersect(const run_container_t *src_1,
7066 const run_container_t *src_2) {
7067 const bool if1 = run_container_is_full(run: src_1);
7068 const bool if2 = run_container_is_full(run: src_2);
7069 if (if1 || if2) {
7070 if (if1) {
7071 return !run_container_empty(run: src_2);
7072 }
7073 if (if2) {
7074 return !run_container_empty(run: src_1);
7075 }
7076 }
7077 int32_t rlepos = 0;
7078 int32_t xrlepos = 0;
7079 int32_t start = src_1->runs[rlepos].value;
7080 int32_t end = start + src_1->runs[rlepos].length + 1;
7081 int32_t xstart = src_2->runs[xrlepos].value;
7082 int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
7083 while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
7084 if (end <= xstart) {
7085 ++rlepos;
7086 if (rlepos < src_1->n_runs) {
7087 start = src_1->runs[rlepos].value;
7088 end = start + src_1->runs[rlepos].length + 1;
7089 }
7090 } else if (xend <= start) {
7091 ++xrlepos;
7092 if (xrlepos < src_2->n_runs) {
7093 xstart = src_2->runs[xrlepos].value;
7094 xend = xstart + src_2->runs[xrlepos].length + 1;
7095 }
7096 } else { // they overlap
7097 return true;
7098 }
7099 }
7100 return false;
7101}
7102
7103
7104/* Compute the difference of src_1 and src_2 and write the result to
7105 * dst. It is assumed that dst is distinct from both src_1 and src_2. */
7106void run_container_andnot(const run_container_t *src_1,
7107 const run_container_t *src_2, run_container_t *dst) {
7108 // following Java implementation as of June 2016
7109
7110 if (dst->capacity < src_1->n_runs + src_2->n_runs)
7111 run_container_grow(run: dst, min: src_1->n_runs + src_2->n_runs, false);
7112
7113 dst->n_runs = 0;
7114
7115 int rlepos1 = 0;
7116 int rlepos2 = 0;
7117 int32_t start = src_1->runs[rlepos1].value;
7118 int32_t end = start + src_1->runs[rlepos1].length + 1;
7119 int32_t start2 = src_2->runs[rlepos2].value;
7120 int32_t end2 = start2 + src_2->runs[rlepos2].length + 1;
7121
7122 while ((rlepos1 < src_1->n_runs) && (rlepos2 < src_2->n_runs)) {
7123 if (end <= start2) {
7124 // output the first run
7125 dst->runs[dst->n_runs++] =
7126 (rle16_t){.value = (uint16_t)start,
7127 .length = (uint16_t)(end - start - 1)};
7128 rlepos1++;
7129 if (rlepos1 < src_1->n_runs) {
7130 start = src_1->runs[rlepos1].value;
7131 end = start + src_1->runs[rlepos1].length + 1;
7132 }
7133 } else if (end2 <= start) {
7134 // exit the second run
7135 rlepos2++;
7136 if (rlepos2 < src_2->n_runs) {
7137 start2 = src_2->runs[rlepos2].value;
7138 end2 = start2 + src_2->runs[rlepos2].length + 1;
7139 }
7140 } else {
7141 if (start < start2) {
7142 dst->runs[dst->n_runs++] =
7143 (rle16_t){.value = (uint16_t)start,
7144 .length = (uint16_t)(start2 - start - 1)};
7145 }
7146 if (end2 < end) {
7147 start = end2;
7148 } else {
7149 rlepos1++;
7150 if (rlepos1 < src_1->n_runs) {
7151 start = src_1->runs[rlepos1].value;
7152 end = start + src_1->runs[rlepos1].length + 1;
7153 }
7154 }
7155 }
7156 }
7157 if (rlepos1 < src_1->n_runs) {
7158 dst->runs[dst->n_runs++] = (rle16_t){
7159 .value = (uint16_t)start, .length = (uint16_t)(end - start - 1)};
7160 rlepos1++;
7161 if (rlepos1 < src_1->n_runs) {
7162 memcpy(dest: dst->runs + dst->n_runs, src: src_1->runs + rlepos1,
7163 n: sizeof(rle16_t) * (src_1->n_runs - rlepos1));
7164 dst->n_runs += src_1->n_runs - rlepos1;
7165 }
7166 }
7167}
7168
7169int run_container_to_uint32_array(void *vout, const run_container_t *cont,
7170 uint32_t base) {
7171 int outpos = 0;
7172 uint32_t *out = (uint32_t *)vout;
7173 for (int i = 0; i < cont->n_runs; ++i) {
7174 uint32_t run_start = base + cont->runs[i].value;
7175 uint16_t le = cont->runs[i].length;
7176 for (int j = 0; j <= le; ++j) {
7177 uint32_t val = run_start + j;
7178 memcpy(dest: out + outpos, src: &val,
7179 n: sizeof(uint32_t)); // should be compiled as a MOV on x64
7180 outpos++;
7181 }
7182 }
7183 return outpos;
7184}
7185
7186/*
7187 * Print this container using printf (useful for debugging).
7188 */
7189void run_container_printf(const run_container_t *cont) {
7190 for (int i = 0; i < cont->n_runs; ++i) {
7191 uint16_t run_start = cont->runs[i].value;
7192 uint16_t le = cont->runs[i].length;
7193 printf(format: "[%d,%d]", run_start, run_start + le);
7194 }
7195}
7196
7197/*
7198 * Print this container using printf as a comma-separated list of 32-bit
7199 * integers starting at base.
7200 */
7201void run_container_printf_as_uint32_array(const run_container_t *cont,
7202 uint32_t base) {
7203 if (cont->n_runs == 0) return;
7204 {
7205 uint32_t run_start = base + cont->runs[0].value;
7206 uint16_t le = cont->runs[0].length;
7207 printf(format: "%u", run_start);
7208 for (uint32_t j = 1; j <= le; ++j) printf(format: ",%u", run_start + j);
7209 }
7210 for (int32_t i = 1; i < cont->n_runs; ++i) {
7211 uint32_t run_start = base + cont->runs[i].value;
7212 uint16_t le = cont->runs[i].length;
7213 for (uint32_t j = 0; j <= le; ++j) printf(format: ",%u", run_start + j);
7214 }
7215}
7216
7217int32_t run_container_serialize(const run_container_t *container, char *buf) {
7218 int32_t l, off;
7219
7220 memcpy(dest: buf, src: &container->n_runs, n: off = sizeof(container->n_runs));
7221 memcpy(dest: &buf[off], src: &container->capacity, n: sizeof(container->capacity));
7222 off += sizeof(container->capacity);
7223
7224 l = sizeof(rle16_t) * container->n_runs;
7225 memcpy(dest: &buf[off], src: container->runs, n: l);
7226 return (off + l);
7227}
7228
7229int32_t run_container_write(const run_container_t *container, char *buf) {
7230 memcpy(dest: buf, src: &container->n_runs, n: sizeof(uint16_t));
7231 memcpy(dest: buf + sizeof(uint16_t), src: container->runs,
7232 n: container->n_runs * sizeof(rle16_t));
7233 return run_container_size_in_bytes(container);
7234}
7235
7236int32_t run_container_read(int32_t cardinality, run_container_t *container,
7237 const char *buf) {
7238 (void)cardinality;
7239 memcpy(dest: &container->n_runs, src: buf, n: sizeof(uint16_t));
7240 if (container->n_runs > container->capacity)
7241 run_container_grow(run: container, min: container->n_runs, false);
7242 if(container->n_runs > 0) {
7243 memcpy(dest: container->runs, src: buf + sizeof(uint16_t),
7244 n: container->n_runs * sizeof(rle16_t));
7245 }
7246 return run_container_size_in_bytes(container);
7247}
7248
7249uint32_t run_container_serialization_len(const run_container_t *container) {
7250 return (sizeof(container->n_runs) + sizeof(container->capacity) +
7251 sizeof(rle16_t) * container->n_runs);
7252}
7253
7254void *run_container_deserialize(const char *buf, size_t buf_len) {
7255 run_container_t *ptr;
7256
7257 if (buf_len < 8 /* n_runs + capacity */)
7258 return (NULL);
7259 else
7260 buf_len -= 8;
7261
7262 if ((ptr = (run_container_t *)malloc(size: sizeof(run_container_t))) != NULL) {
7263 size_t len;
7264 int32_t off;
7265
7266 memcpy(dest: &ptr->n_runs, src: buf, n: off = 4);
7267 memcpy(dest: &ptr->capacity, src: &buf[off], n: 4);
7268 off += 4;
7269
7270 len = sizeof(rle16_t) * ptr->n_runs;
7271
7272 if (len != buf_len) {
7273 free(ptr: ptr);
7274 return (NULL);
7275 }
7276
7277 if ((ptr->runs = (rle16_t *)malloc(size: len)) == NULL) {
7278 free(ptr: ptr);
7279 return (NULL);
7280 }
7281
7282 memcpy(dest: ptr->runs, src: &buf[off], n: len);
7283
7284 /* Check if returned values are monotonically increasing */
7285 for (int32_t i = 0, j = 0; i < ptr->n_runs; i++) {
7286 if (ptr->runs[i].value < j) {
7287 free(ptr: ptr->runs);
7288 free(ptr: ptr);
7289 return (NULL);
7290 } else
7291 j = ptr->runs[i].value;
7292 }
7293 }
7294
7295 return (ptr);
7296}
7297
7298bool run_container_iterate(const run_container_t *cont, uint32_t base,
7299 roaring_iterator iterator, void *ptr) {
7300 for (int i = 0; i < cont->n_runs; ++i) {
7301 uint32_t run_start = base + cont->runs[i].value;
7302 uint16_t le = cont->runs[i].length;
7303
7304 for (int j = 0; j <= le; ++j)
7305 if (!iterator(run_start + j, ptr)) return false;
7306 }
7307 return true;
7308}
7309
7310bool run_container_iterate64(const run_container_t *cont, uint32_t base,
7311 roaring_iterator64 iterator, uint64_t high_bits,
7312 void *ptr) {
7313 for (int i = 0; i < cont->n_runs; ++i) {
7314 uint32_t run_start = base + cont->runs[i].value;
7315 uint16_t le = cont->runs[i].length;
7316
7317 for (int j = 0; j <= le; ++j)
7318 if (!iterator(high_bits | (uint64_t)(run_start + j), ptr))
7319 return false;
7320 }
7321 return true;
7322}
7323
7324bool run_container_is_subset(const run_container_t *container1,
7325 const run_container_t *container2) {
7326 int i1 = 0, i2 = 0;
7327 while (i1 < container1->n_runs && i2 < container2->n_runs) {
7328 int start1 = container1->runs[i1].value;
7329 int stop1 = start1 + container1->runs[i1].length;
7330 int start2 = container2->runs[i2].value;
7331 int stop2 = start2 + container2->runs[i2].length;
7332 if (start1 < start2) {
7333 return false;
7334 } else { // start1 >= start2
7335 if (stop1 < stop2) {
7336 i1++;
7337 } else if (stop1 == stop2) {
7338 i1++;
7339 i2++;
7340 } else { // stop1 > stop2
7341 i2++;
7342 }
7343 }
7344 }
7345 if (i1 == container1->n_runs) {
7346 return true;
7347 } else {
7348 return false;
7349 }
7350}
7351
7352// TODO: write smart_append_exclusive version to match the overloaded 1 param
7353// Java version (or is it even used?)
7354
7355// follows the Java implementation closely
7356// length is the rle-value. Ie, run [10,12) uses a length value 1.
7357void run_container_smart_append_exclusive(run_container_t *src,
7358 const uint16_t start,
7359 const uint16_t length) {
7360 int old_end;
7361 rle16_t *last_run = src->n_runs ? src->runs + (src->n_runs - 1) : NULL;
7362 rle16_t *appended_last_run = src->runs + src->n_runs;
7363
7364 if (!src->n_runs ||
7365 (start > (old_end = last_run->value + last_run->length + 1))) {
7366 *appended_last_run = (rle16_t){.value = start, .length = length};
7367 src->n_runs++;
7368 return;
7369 }
7370 if (old_end == start) {
7371 // we merge
7372 last_run->length += (length + 1);
7373 return;
7374 }
7375 int new_end = start + length + 1;
7376
7377 if (start == last_run->value) {
7378 // wipe out previous
7379 if (new_end < old_end) {
7380 *last_run = (rle16_t){.value = (uint16_t)new_end,
7381 .length = (uint16_t)(old_end - new_end - 1)};
7382 return;
7383 } else if (new_end > old_end) {
7384 *last_run = (rle16_t){.value = (uint16_t)old_end,
7385 .length = (uint16_t)(new_end - old_end - 1)};
7386 return;
7387 } else {
7388 src->n_runs--;
7389 return;
7390 }
7391 }
7392 last_run->length = start - last_run->value - 1;
7393 if (new_end < old_end) {
7394 *appended_last_run =
7395 (rle16_t){.value = (uint16_t)new_end,
7396 .length = (uint16_t)(old_end - new_end - 1)};
7397 src->n_runs++;
7398 } else if (new_end > old_end) {
7399 *appended_last_run =
7400 (rle16_t){.value = (uint16_t)old_end,
7401 .length = (uint16_t)(new_end - old_end - 1)};
7402 src->n_runs++;
7403 }
7404}
7405
7406bool run_container_select(const run_container_t *container,
7407 uint32_t *start_rank, uint32_t rank,
7408 uint32_t *element) {
7409 for (int i = 0; i < container->n_runs; i++) {
7410 uint16_t length = container->runs[i].length;
7411 if (rank <= *start_rank + length) {
7412 uint16_t value = container->runs[i].value;
7413 *element = value + rank - (*start_rank);
7414 return true;
7415 } else
7416 *start_rank += length + 1;
7417 }
7418 return false;
7419}
7420
7421int run_container_rank(const run_container_t *container, uint16_t x) {
7422 int sum = 0;
7423 uint32_t x32 = x;
7424 for (int i = 0; i < container->n_runs; i++) {
7425 uint32_t startpoint = container->runs[i].value;
7426 uint32_t length = container->runs[i].length;
7427 uint32_t endpoint = length + startpoint;
7428 if (x <= endpoint) {
7429 if (x < startpoint) break;
7430 return sum + (x32 - startpoint) + 1;
7431 } else {
7432 sum += length + 1;
7433 }
7434 }
7435 return sum;
7436}
7437/* end file src/containers/run.c */
7438/* begin file src/roaring.c */
7439#include <assert.h>
7440#include <stdarg.h>
7441#include <stdint.h>
7442#include <stdio.h>
7443#include <string.h>
7444#include <inttypes.h>
7445
7446static inline bool is_cow(const roaring_bitmap_t *r) {
7447 return r->high_low_container.flags & ROARING_FLAG_COW;
7448}
7449static inline bool is_frozen(const roaring_bitmap_t *r) {
7450 return r->high_low_container.flags & ROARING_FLAG_FROZEN;
7451}
7452
7453// this is like roaring_bitmap_add, but it populates pointer arguments in such a
7454// way
7455// that we can recover the container touched, which, in turn can be used to
7456// accelerate some functions (when you repeatedly need to add to the same
7457// container)
7458static inline void *containerptr_roaring_bitmap_add(roaring_bitmap_t *r,
7459 uint32_t val,
7460 uint8_t *typecode,
7461 int *index) {
7462 uint16_t hb = val >> 16;
7463 const int i = ra_get_index(ra: &r->high_low_container, x: hb);
7464 if (i >= 0) {
7465 ra_unshare_container_at_index(ra: &r->high_low_container, i);
7466 void *container =
7467 ra_get_container_at_index(ra: &r->high_low_container, i, typecode);
7468 uint8_t newtypecode = *typecode;
7469 void *container2 =
7470 container_add(container, val: val & 0xFFFF, typecode: *typecode, new_typecode: &newtypecode);
7471 *index = i;
7472 if (container2 != container) {
7473 container_free(container, typecode: *typecode);
7474 ra_set_container_at_index(ra: &r->high_low_container, i, c: container2,
7475 typecode: newtypecode);
7476 *typecode = newtypecode;
7477 return container2;
7478 } else {
7479 return container;
7480 }
7481 } else {
7482 array_container_t *newac = array_container_create();
7483 void *container = container_add(container: newac, val: val & 0xFFFF,
7484 ARRAY_CONTAINER_TYPE_CODE, new_typecode: typecode);
7485 // we could just assume that it stays an array container
7486 ra_insert_new_key_value_at(ra: &r->high_low_container, i: -i - 1, key: hb,
7487 container, typecode: *typecode);
7488 *index = -i - 1;
7489 return container;
7490 }
7491}
7492
7493roaring_bitmap_t *roaring_bitmap_create(void) {
7494 roaring_bitmap_t *ans =
7495 (roaring_bitmap_t *)malloc(size: sizeof(roaring_bitmap_t));
7496 if (!ans) {
7497 return NULL;
7498 }
7499 ra_init(t: &ans->high_low_container);
7500 return ans;
7501}
7502
7503roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) {
7504 roaring_bitmap_t *ans =
7505 (roaring_bitmap_t *)malloc(size: sizeof(roaring_bitmap_t));
7506 if (!ans) {
7507 return NULL;
7508 }
7509 bool is_ok = ra_init_with_capacity(new_ra: &ans->high_low_container, cap);
7510 if (!is_ok) {
7511 free(ptr: ans);
7512 return NULL;
7513 }
7514 return ans;
7515}
7516
7517void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args,
7518 const uint32_t *vals) {
7519 void *container = NULL; // hold value of last container touched
7520 uint8_t typecode = 0; // typecode of last container touched
7521 uint32_t prev = 0; // previous valued inserted
7522 size_t i = 0; // index of value
7523 int containerindex = 0;
7524 if (n_args == 0) return;
7525 uint32_t val;
7526 memcpy(dest: &val, src: vals + i, n: sizeof(val));
7527 container =
7528 containerptr_roaring_bitmap_add(r, val, typecode: &typecode, index: &containerindex);
7529 prev = val;
7530 i++;
7531 for (; i < n_args; i++) {
7532 memcpy(dest: &val, src: vals + i, n: sizeof(val));
7533 if (((prev ^ val) >> 16) ==
7534 0) { // no need to seek the container, it is at hand
7535 // because we already have the container at hand, we can do the
7536 // insertion
7537 // automatically, bypassing the roaring_bitmap_add call
7538 uint8_t newtypecode = typecode;
7539 void *container2 =
7540 container_add(container, val: val & 0xFFFF, typecode, new_typecode: &newtypecode);
7541 if (container2 != container) { // rare instance when we need to
7542 // change the container type
7543 container_free(container, typecode);
7544 ra_set_container_at_index(ra: &r->high_low_container,
7545 i: containerindex, c: container2,
7546 typecode: newtypecode);
7547 typecode = newtypecode;
7548 container = container2;
7549 }
7550 } else {
7551 container = containerptr_roaring_bitmap_add(r, val, typecode: &typecode,
7552 index: &containerindex);
7553 }
7554 prev = val;
7555 }
7556}
7557
7558roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) {
7559 roaring_bitmap_t *answer = roaring_bitmap_create();
7560 roaring_bitmap_add_many(r: answer, n_args, vals);
7561 return answer;
7562}
7563
7564roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) {
7565 // todo: could be greatly optimized but we do not expect this call to ever
7566 // include long lists
7567 roaring_bitmap_t *answer = roaring_bitmap_create();
7568 va_list ap;
7569 va_start(ap, n_args);
7570 for (size_t i = 1; i <= n_args; i++) {
7571 uint32_t val = va_arg(ap, uint32_t);
7572 roaring_bitmap_add(r: answer, x: val);
7573 }
7574 va_end(ap);
7575 return answer;
7576}
7577
7578static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) {
7579 return (a < b) ? a : b;
7580}
7581
7582static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) {
7583 return (a < b) ? a : b;
7584}
7585
7586roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max,
7587 uint32_t step) {
7588 if(max >= UINT64_C(0x100000000)) {
7589 max = UINT64_C(0x100000000);
7590 }
7591 if (step == 0) return NULL;
7592 if (max <= min) return NULL;
7593 roaring_bitmap_t *answer = roaring_bitmap_create();
7594 if (step >= (1 << 16)) {
7595 for (uint32_t value = (uint32_t)min; value < max; value += step) {
7596 roaring_bitmap_add(r: answer, x: value);
7597 }
7598 return answer;
7599 }
7600 uint64_t min_tmp = min;
7601 do {
7602 uint32_t key = (uint32_t)min_tmp >> 16;
7603 uint32_t container_min = min_tmp & 0xFFFF;
7604 uint32_t container_max = (uint32_t)minimum_uint64(a: max - (key << 16), b: 1 << 16);
7605 uint8_t type;
7606 void *container = container_from_range(type: &type, min: container_min,
7607 max: container_max, step: (uint16_t)step);
7608 ra_append(ra: &answer->high_low_container, s: key, c: container, typecode: type);
7609 uint32_t gap = container_max - container_min + step - 1;
7610 min_tmp += gap - (gap % step);
7611 } while (min_tmp < max);
7612 // cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step
7613 return answer;
7614}
7615
7616void roaring_bitmap_add_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max) {
7617 if (min > max) {
7618 return;
7619 }
7620
7621 uint32_t min_key = min >> 16;
7622 uint32_t max_key = max >> 16;
7623
7624 int32_t num_required_containers = max_key - min_key + 1;
7625 int32_t suffix_length = count_greater(array: ra->high_low_container.keys,
7626 lenarray: ra->high_low_container.size,
7627 ikey: max_key);
7628 int32_t prefix_length = count_less(array: ra->high_low_container.keys,
7629 lenarray: ra->high_low_container.size - suffix_length,
7630 ikey: min_key);
7631 int32_t common_length = ra->high_low_container.size - prefix_length - suffix_length;
7632
7633 if (num_required_containers > common_length) {
7634 ra_shift_tail(ra: &ra->high_low_container, count: suffix_length,
7635 distance: num_required_containers - common_length);
7636 }
7637
7638 int32_t src = prefix_length + common_length - 1;
7639 int32_t dst = ra->high_low_container.size - suffix_length - 1;
7640 for (uint32_t key = max_key; key != min_key-1; key--) { // beware of min_key==0
7641 uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0;
7642 uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff;
7643 void* new_container;
7644 uint8_t new_type;
7645
7646 if (src >= 0 && ra->high_low_container.keys[src] == key) {
7647 ra_unshare_container_at_index(ra: &ra->high_low_container, i: src);
7648 new_container = container_add_range(container: ra->high_low_container.containers[src],
7649 type: ra->high_low_container.typecodes[src],
7650 min: container_min, max: container_max, result_type: &new_type);
7651 if (new_container != ra->high_low_container.containers[src]) {
7652 container_free(container: ra->high_low_container.containers[src],
7653 typecode: ra->high_low_container.typecodes[src]);
7654 }
7655 src--;
7656 } else {
7657 new_container = container_from_range(type: &new_type, min: container_min,
7658 max: container_max+1, step: 1);
7659 }
7660 ra_replace_key_and_container_at_index(ra: &ra->high_low_container, i: dst,
7661 key, c: new_container, typecode: new_type);
7662 dst--;
7663 }
7664}
7665
7666void roaring_bitmap_remove_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max) {
7667 if (min > max) {
7668 return;
7669 }
7670
7671 uint32_t min_key = min >> 16;
7672 uint32_t max_key = max >> 16;
7673
7674 int32_t src = count_less(array: ra->high_low_container.keys, lenarray: ra->high_low_container.size, ikey: min_key);
7675 int32_t dst = src;
7676 while (src < ra->high_low_container.size && ra->high_low_container.keys[src] <= max_key) {
7677 uint32_t container_min = (min_key == ra->high_low_container.keys[src]) ? (min & 0xffff) : 0;
7678 uint32_t container_max = (max_key == ra->high_low_container.keys[src]) ? (max & 0xffff) : 0xffff;
7679 ra_unshare_container_at_index(ra: &ra->high_low_container, i: src);
7680 void *new_container;
7681 uint8_t new_type;
7682 new_container = container_remove_range(container: ra->high_low_container.containers[src],
7683 type: ra->high_low_container.typecodes[src],
7684 min: container_min, max: container_max,
7685 result_type: &new_type);
7686 if (new_container != ra->high_low_container.containers[src]) {
7687 container_free(container: ra->high_low_container.containers[src],
7688 typecode: ra->high_low_container.typecodes[src]);
7689 }
7690 if (new_container) {
7691 ra_replace_key_and_container_at_index(ra: &ra->high_low_container, i: dst,
7692 key: ra->high_low_container.keys[src],
7693 c: new_container, typecode: new_type);
7694 dst++;
7695 }
7696 src++;
7697 }
7698 if (src > dst) {
7699 ra_shift_tail(ra: &ra->high_low_container, count: ra->high_low_container.size - src, distance: dst - src);
7700 }
7701}
7702
7703void roaring_bitmap_printf(const roaring_bitmap_t *ra) {
7704 printf(format: "{");
7705 for (int i = 0; i < ra->high_low_container.size; ++i) {
7706 container_printf_as_uint32_array(
7707 container: ra->high_low_container.containers[i],
7708 typecode: ra->high_low_container.typecodes[i],
7709 base: ((uint32_t)ra->high_low_container.keys[i]) << 16);
7710 if (i + 1 < ra->high_low_container.size) printf(format: ",");
7711 }
7712 printf(format: "}");
7713}
7714
7715void roaring_bitmap_printf_describe(const roaring_bitmap_t *ra) {
7716 printf(format: "{");
7717 for (int i = 0; i < ra->high_low_container.size; ++i) {
7718 printf(format: "%d: %s (%d)", ra->high_low_container.keys[i],
7719 get_full_container_name(container: ra->high_low_container.containers[i],
7720 typecode: ra->high_low_container.typecodes[i]),
7721 container_get_cardinality(container: ra->high_low_container.containers[i],
7722 typecode: ra->high_low_container.typecodes[i]));
7723 if (ra->high_low_container.typecodes[i] == SHARED_CONTAINER_TYPE_CODE) {
7724 printf(
7725 format: "(shared count = %" PRIu32 " )",
7726 ((shared_container_t *)(ra->high_low_container.containers[i]))
7727 ->counter);
7728 }
7729
7730 if (i + 1 < ra->high_low_container.size) printf(format: ", ");
7731 }
7732 printf(format: "}");
7733}
7734
7735typedef struct min_max_sum_s {
7736 uint32_t min;
7737 uint32_t max;
7738 uint64_t sum;
7739} min_max_sum_t;
7740
7741static bool min_max_sum_fnc(uint32_t value, void *param) {
7742 min_max_sum_t *mms = (min_max_sum_t *)param;
7743 if (value > mms->max) mms->max = value;
7744 if (value < mms->min) mms->min = value;
7745 mms->sum += value;
7746 return true; // we always process all data points
7747}
7748
7749/**
7750* (For advanced users.)
7751* Collect statistics about the bitmap
7752*/
7753void roaring_bitmap_statistics(const roaring_bitmap_t *ra,
7754 roaring_statistics_t *stat) {
7755 memset(s: stat, c: 0, n: sizeof(*stat));
7756 stat->n_containers = ra->high_low_container.size;
7757 stat->cardinality = roaring_bitmap_get_cardinality(ra);
7758 min_max_sum_t mms;
7759 mms.min = UINT32_C(0xFFFFFFFF);
7760 mms.max = UINT32_C(0);
7761 mms.sum = 0;
7762 roaring_iterate(ra, iterator: &min_max_sum_fnc, ptr: &mms);
7763 stat->min_value = mms.min;
7764 stat->max_value = mms.max;
7765 stat->sum_value = mms.sum;
7766
7767 for (int i = 0; i < ra->high_low_container.size; ++i) {
7768 uint8_t truetype =
7769 get_container_type(container: ra->high_low_container.containers[i],
7770 type: ra->high_low_container.typecodes[i]);
7771 uint32_t card =
7772 container_get_cardinality(container: ra->high_low_container.containers[i],
7773 typecode: ra->high_low_container.typecodes[i]);
7774 uint32_t sbytes =
7775 container_size_in_bytes(container: ra->high_low_container.containers[i],
7776 typecode: ra->high_low_container.typecodes[i]);
7777 switch (truetype) {
7778 case BITSET_CONTAINER_TYPE_CODE:
7779 stat->n_bitset_containers++;
7780 stat->n_values_bitset_containers += card;
7781 stat->n_bytes_bitset_containers += sbytes;
7782 break;
7783 case ARRAY_CONTAINER_TYPE_CODE:
7784 stat->n_array_containers++;
7785 stat->n_values_array_containers += card;
7786 stat->n_bytes_array_containers += sbytes;
7787 break;
7788 case RUN_CONTAINER_TYPE_CODE:
7789 stat->n_run_containers++;
7790 stat->n_values_run_containers += card;
7791 stat->n_bytes_run_containers += sbytes;
7792 break;
7793 default:
7794 assert(false);
7795 __builtin_unreachable();
7796 }
7797 }
7798}
7799
7800roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) {
7801 roaring_bitmap_t *ans =
7802 (roaring_bitmap_t *)malloc(size: sizeof(roaring_bitmap_t));
7803 if (!ans) {
7804 return NULL;
7805 }
7806 bool is_ok = ra_copy(source: &r->high_low_container, dest: &ans->high_low_container,
7807 copy_on_write: is_cow(r));
7808 if (!is_ok) {
7809 free(ptr: ans);
7810 return NULL;
7811 }
7812 roaring_bitmap_set_copy_on_write(r: ans, cow: is_cow(r));
7813 return ans;
7814}
7815
7816bool roaring_bitmap_overwrite(roaring_bitmap_t *dest,
7817 const roaring_bitmap_t *src) {
7818 return ra_overwrite(source: &src->high_low_container, dest: &dest->high_low_container,
7819 copy_on_write: is_cow(r: src));
7820}
7821
7822void roaring_bitmap_free(const roaring_bitmap_t *r) {
7823 if (!is_frozen(r)) {
7824 ra_clear(r: (roaring_array_t*)&r->high_low_container);
7825 }
7826 free(ptr: (roaring_bitmap_t*)r);
7827}
7828
7829void roaring_bitmap_clear(roaring_bitmap_t *r) {
7830 ra_reset(ra: &r->high_low_container);
7831}
7832
7833void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) {
7834 const uint16_t hb = val >> 16;
7835 const int i = ra_get_index(ra: &r->high_low_container, x: hb);
7836 uint8_t typecode;
7837 if (i >= 0) {
7838 ra_unshare_container_at_index(ra: &r->high_low_container, i);
7839 void *container =
7840 ra_get_container_at_index(ra: &r->high_low_container, i, typecode: &typecode);
7841 uint8_t newtypecode = typecode;
7842 void *container2 =
7843 container_add(container, val: val & 0xFFFF, typecode, new_typecode: &newtypecode);
7844 if (container2 != container) {
7845 container_free(container, typecode);
7846 ra_set_container_at_index(ra: &r->high_low_container, i, c: container2,
7847 typecode: newtypecode);
7848 }
7849 } else {
7850 array_container_t *newac = array_container_create();
7851 void *container = container_add(container: newac, val: val & 0xFFFF,
7852 ARRAY_CONTAINER_TYPE_CODE, new_typecode: &typecode);
7853 // we could just assume that it stays an array container
7854 ra_insert_new_key_value_at(ra: &r->high_low_container, i: -i - 1, key: hb,
7855 container, typecode);
7856 }
7857}
7858
7859bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) {
7860 const uint16_t hb = val >> 16;
7861 const int i = ra_get_index(ra: &r->high_low_container, x: hb);
7862 uint8_t typecode;
7863 bool result = false;
7864 if (i >= 0) {
7865 ra_unshare_container_at_index(ra: &r->high_low_container, i);
7866 void *container =
7867 ra_get_container_at_index(ra: &r->high_low_container, i, typecode: &typecode);
7868
7869 const int oldCardinality =
7870 container_get_cardinality(container, typecode);
7871
7872 uint8_t newtypecode = typecode;
7873 void *container2 =
7874 container_add(container, val: val & 0xFFFF, typecode, new_typecode: &newtypecode);
7875 if (container2 != container) {
7876 container_free(container, typecode);
7877 ra_set_container_at_index(ra: &r->high_low_container, i, c: container2,
7878 typecode: newtypecode);
7879 result = true;
7880 } else {
7881 const int newCardinality =
7882 container_get_cardinality(container, typecode: newtypecode);
7883
7884 result = oldCardinality != newCardinality;
7885 }
7886 } else {
7887 array_container_t *newac = array_container_create();
7888 void *container = container_add(container: newac, val: val & 0xFFFF,
7889 ARRAY_CONTAINER_TYPE_CODE, new_typecode: &typecode);
7890 // we could just assume that it stays an array container
7891 ra_insert_new_key_value_at(ra: &r->high_low_container, i: -i - 1, key: hb,
7892 container, typecode);
7893 result = true;
7894 }
7895
7896 return result;
7897}
7898
7899void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) {
7900 const uint16_t hb = val >> 16;
7901 const int i = ra_get_index(ra: &r->high_low_container, x: hb);
7902 uint8_t typecode;
7903 if (i >= 0) {
7904 ra_unshare_container_at_index(ra: &r->high_low_container, i);
7905 void *container =
7906 ra_get_container_at_index(ra: &r->high_low_container, i, typecode: &typecode);
7907 uint8_t newtypecode = typecode;
7908 void *container2 =
7909 container_remove(container, val: val & 0xFFFF, typecode, new_typecode: &newtypecode);
7910 if (container2 != container) {
7911 container_free(container, typecode);
7912 ra_set_container_at_index(ra: &r->high_low_container, i, c: container2,
7913 typecode: newtypecode);
7914 }
7915 if (container_get_cardinality(container: container2, typecode: newtypecode) != 0) {
7916 ra_set_container_at_index(ra: &r->high_low_container, i, c: container2,
7917 typecode: newtypecode);
7918 } else {
7919 ra_remove_at_index_and_free(ra: &r->high_low_container, i);
7920 }
7921 }
7922}
7923
7924bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) {
7925 const uint16_t hb = val >> 16;
7926 const int i = ra_get_index(ra: &r->high_low_container, x: hb);
7927 uint8_t typecode;
7928 bool result = false;
7929 if (i >= 0) {
7930 ra_unshare_container_at_index(ra: &r->high_low_container, i);
7931 void *container =
7932 ra_get_container_at_index(ra: &r->high_low_container, i, typecode: &typecode);
7933
7934 const int oldCardinality =
7935 container_get_cardinality(container, typecode);
7936
7937 uint8_t newtypecode = typecode;
7938 void *container2 =
7939 container_remove(container, val: val & 0xFFFF, typecode, new_typecode: &newtypecode);
7940 if (container2 != container) {
7941 container_free(container, typecode);
7942 ra_set_container_at_index(ra: &r->high_low_container, i, c: container2,
7943 typecode: newtypecode);
7944 }
7945
7946 const int newCardinality =
7947 container_get_cardinality(container: container2, typecode: newtypecode);
7948
7949 if (newCardinality != 0) {
7950 ra_set_container_at_index(ra: &r->high_low_container, i, c: container2,
7951 typecode: newtypecode);
7952 } else {
7953 ra_remove_at_index_and_free(ra: &r->high_low_container, i);
7954 }
7955
7956 result = oldCardinality != newCardinality;
7957 }
7958 return result;
7959}
7960
7961void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args,
7962 const uint32_t *vals) {
7963 if (n_args == 0 || r->high_low_container.size == 0) {
7964 return;
7965 }
7966 int32_t pos = -1; // position of the container used in the previous iteration
7967 for (size_t i = 0; i < n_args; i++) {
7968 uint16_t key = (uint16_t)(vals[i] >> 16);
7969 if (pos < 0 || key != r->high_low_container.keys[pos]) {
7970 pos = ra_get_index(ra: &r->high_low_container, x: key);
7971 }
7972 if (pos >= 0) {
7973 uint8_t new_typecode;
7974 void *new_container;
7975 new_container = container_remove(container: r->high_low_container.containers[pos],
7976 val: vals[i] & 0xffff,
7977 typecode: r->high_low_container.typecodes[pos],
7978 new_typecode: &new_typecode);
7979 if (new_container != r->high_low_container.containers[pos]) {
7980 container_free(container: r->high_low_container.containers[pos],
7981 typecode: r->high_low_container.typecodes[pos]);
7982 ra_replace_key_and_container_at_index(ra: &r->high_low_container,
7983 i: pos, key, c: new_container,
7984 typecode: new_typecode);
7985 }
7986 if (!container_nonzero_cardinality(container: new_container, typecode: new_typecode)) {
7987 container_free(container: new_container, typecode: new_typecode);
7988 ra_remove_at_index(ra: &r->high_low_container, i: pos);
7989 pos = -1;
7990 }
7991 }
7992 }
7993}
7994
7995// there should be some SIMD optimizations possible here
7996roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1,
7997 const roaring_bitmap_t *x2) {
7998 uint8_t container_result_type = 0;
7999 const int length1 = x1->high_low_container.size,
8000 length2 = x2->high_low_container.size;
8001 uint32_t neededcap = length1 > length2 ? length2 : length1;
8002 roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(cap: neededcap);
8003 roaring_bitmap_set_copy_on_write(r: answer, cow: is_cow(r: x1) && is_cow(r: x2));
8004
8005 int pos1 = 0, pos2 = 0;
8006
8007 while (pos1 < length1 && pos2 < length2) {
8008 const uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
8009 const uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
8010
8011 if (s1 == s2) {
8012 uint8_t container_type_1, container_type_2;
8013 void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1,
8014 typecode: &container_type_1);
8015 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2,
8016 typecode: &container_type_2);
8017 void *c = container_and(c1, type1: container_type_1, c2, type2: container_type_2,
8018 result_type: &container_result_type);
8019 if (container_nonzero_cardinality(container: c, typecode: container_result_type)) {
8020 ra_append(ra: &answer->high_low_container, s: s1, c,
8021 typecode: container_result_type);
8022 } else {
8023 container_free(
8024 container: c, typecode: container_result_type); // otherwise:memory leak!
8025 }
8026 ++pos1;
8027 ++pos2;
8028 } else if (s1 < s2) { // s1 < s2
8029 pos1 = ra_advance_until(ra: &x1->high_low_container, x: s2, pos: pos1);
8030 } else { // s1 > s2
8031 pos2 = ra_advance_until(ra: &x2->high_low_container, x: s1, pos: pos2);
8032 }
8033 }
8034 return answer;
8035}
8036
8037/**
8038 * Compute the union of 'number' bitmaps.
8039 */
8040roaring_bitmap_t *roaring_bitmap_or_many(size_t number,
8041 const roaring_bitmap_t **x) {
8042 if (number == 0) {
8043 return roaring_bitmap_create();
8044 }
8045 if (number == 1) {
8046 return roaring_bitmap_copy(r: x[0]);
8047 }
8048 roaring_bitmap_t *answer =
8049 roaring_bitmap_lazy_or(x1: x[0], x2: x[1], LAZY_OR_BITSET_CONVERSION);
8050 for (size_t i = 2; i < number; i++) {
8051 roaring_bitmap_lazy_or_inplace(x1: answer, x2: x[i], LAZY_OR_BITSET_CONVERSION);
8052 }
8053 roaring_bitmap_repair_after_lazy(x1: answer);
8054 return answer;
8055}
8056
8057/**
8058 * Compute the xor of 'number' bitmaps.
8059 */
8060roaring_bitmap_t *roaring_bitmap_xor_many(size_t number,
8061 const roaring_bitmap_t **x) {
8062 if (number == 0) {
8063 return roaring_bitmap_create();
8064 }
8065 if (number == 1) {
8066 return roaring_bitmap_copy(r: x[0]);
8067 }
8068 roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x1: x[0], x2: x[1]);
8069 for (size_t i = 2; i < number; i++) {
8070 roaring_bitmap_lazy_xor_inplace(x1: answer, x2: x[i]);
8071 }
8072 roaring_bitmap_repair_after_lazy(x1: answer);
8073 return answer;
8074}
8075
8076// inplace and (modifies its first argument).
8077void roaring_bitmap_and_inplace(roaring_bitmap_t *x1,
8078 const roaring_bitmap_t *x2) {
8079 if (x1 == x2) return;
8080 int pos1 = 0, pos2 = 0, intersection_size = 0;
8081 const int length1 = ra_get_size(ra: &x1->high_low_container);
8082 const int length2 = ra_get_size(ra: &x2->high_low_container);
8083
8084 // any skipped-over or newly emptied containers in x1
8085 // have to be freed.
8086 while (pos1 < length1 && pos2 < length2) {
8087 const uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
8088 const uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
8089
8090 if (s1 == s2) {
8091 uint8_t typecode1, typecode2, typecode_result;
8092 void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1,
8093 typecode: &typecode1);
8094 c1 = get_writable_copy_if_shared(candidate_shared_container: c1, type: &typecode1);
8095 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2,
8096 typecode: &typecode2);
8097 void *c =
8098 container_iand(c1, type1: typecode1, c2, type2: typecode2, result_type: &typecode_result);
8099 if (c != c1) { // in this instance a new container was created, and
8100 // we need to free the old one
8101 container_free(container: c1, typecode: typecode1);
8102 }
8103 if (container_nonzero_cardinality(container: c, typecode: typecode_result)) {
8104 ra_replace_key_and_container_at_index(ra: &x1->high_low_container,
8105 i: intersection_size, key: s1, c,
8106 typecode: typecode_result);
8107 intersection_size++;
8108 } else {
8109 container_free(container: c, typecode: typecode_result);
8110 }
8111 ++pos1;
8112 ++pos2;
8113 } else if (s1 < s2) {
8114 pos1 = ra_advance_until_freeing(ra: &x1->high_low_container, x: s2, pos: pos1);
8115 } else { // s1 > s2
8116 pos2 = ra_advance_until(ra: &x2->high_low_container, x: s1, pos: pos2);
8117 }
8118 }
8119
8120 // if we ended early because x2 ran out, then all remaining in x1 should be
8121 // freed
8122 while (pos1 < length1) {
8123 container_free(container: x1->high_low_container.containers[pos1],
8124 typecode: x1->high_low_container.typecodes[pos1]);
8125 ++pos1;
8126 }
8127
8128 // all containers after this have either been copied or freed
8129 ra_downsize(ra: &x1->high_low_container, new_length: intersection_size);
8130}
8131
8132roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1,
8133 const roaring_bitmap_t *x2) {
8134 uint8_t container_result_type = 0;
8135 const int length1 = x1->high_low_container.size,
8136 length2 = x2->high_low_container.size;
8137 if (0 == length1) {
8138 return roaring_bitmap_copy(r: x2);
8139 }
8140 if (0 == length2) {
8141 return roaring_bitmap_copy(r: x1);
8142 }
8143 roaring_bitmap_t *answer =
8144 roaring_bitmap_create_with_capacity(cap: length1 + length2);
8145 roaring_bitmap_set_copy_on_write(r: answer, cow: is_cow(r: x1) && is_cow(r: x2));
8146 int pos1 = 0, pos2 = 0;
8147 uint8_t container_type_1, container_type_2;
8148 uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
8149 uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
8150 while (true) {
8151 if (s1 == s2) {
8152 void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1,
8153 typecode: &container_type_1);
8154 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2,
8155 typecode: &container_type_2);
8156 void *c = container_or(c1, type1: container_type_1, c2, type2: container_type_2,
8157 result_type: &container_result_type);
8158 // since we assume that the initial containers are non-empty, the
8159 // result here
8160 // can only be non-empty
8161 ra_append(ra: &answer->high_low_container, s: s1, c,
8162 typecode: container_result_type);
8163 ++pos1;
8164 ++pos2;
8165 if (pos1 == length1) break;
8166 if (pos2 == length2) break;
8167 s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
8168 s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
8169
8170 } else if (s1 < s2) { // s1 < s2
8171 void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1,
8172 typecode: &container_type_1);
8173 // c1 = container_clone(c1, container_type_1);
8174 c1 =
8175 get_copy_of_container(container: c1, typecode: &container_type_1, copy_on_write: is_cow(r: x1));
8176 if (is_cow(r: x1)) {
8177 ra_set_container_at_index(ra: &x1->high_low_container, i: pos1, c: c1,
8178 typecode: container_type_1);
8179 }
8180 ra_append(ra: &answer->high_low_container, s: s1, c: c1, typecode: container_type_1);
8181 pos1++;
8182 if (pos1 == length1) break;
8183 s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
8184
8185 } else { // s1 > s2
8186 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2,
8187 typecode: &container_type_2);
8188 // c2 = container_clone(c2, container_type_2);
8189 c2 =
8190 get_copy_of_container(container: c2, typecode: &container_type_2, copy_on_write: is_cow(r: x2));
8191 if (is_cow(r: x2)) {
8192 ra_set_container_at_index(ra: &x2->high_low_container, i: pos2, c: c2,
8193 typecode: container_type_2);
8194 }
8195 ra_append(ra: &answer->high_low_container, s: s2, c: c2, typecode: container_type_2);
8196 pos2++;
8197 if (pos2 == length2) break;
8198 s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
8199 }
8200 }
8201 if (pos1 == length1) {
8202 ra_append_copy_range(ra: &answer->high_low_container,
8203 sa: &x2->high_low_container, start_index: pos2, end_index: length2,
8204 copy_on_write: is_cow(r: x2));
8205 } else if (pos2 == length2) {
8206 ra_append_copy_range(ra: &answer->high_low_container,
8207 sa: &x1->high_low_container, start_index: pos1, end_index: length1,
8208 copy_on_write: is_cow(r: x1));
8209 }
8210 return answer;
8211}
8212
8213// inplace or (modifies its first argument).
8214void roaring_bitmap_or_inplace(roaring_bitmap_t *x1,
8215 const roaring_bitmap_t *x2) {
8216 uint8_t container_result_type = 0;
8217 int length1 = x1->high_low_container.size;
8218 const int length2 = x2->high_low_container.size;
8219
8220 if (0 == length2) return;
8221
8222 if (0 == length1) {
8223 roaring_bitmap_overwrite(dest: x1, src: x2);
8224 return;
8225 }
8226 int pos1 = 0, pos2 = 0;
8227 uint8_t container_type_1, container_type_2;
8228 uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
8229 uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
8230 while (true) {
8231 if (s1 == s2) {
8232 void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1,
8233 typecode: &container_type_1);
8234 if (!container_is_full(container: c1, typecode: container_type_1)) {
8235 c1 = get_writable_copy_if_shared(candidate_shared_container: c1, type: &container_type_1);
8236
8237 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container,
8238 i: pos2, typecode: &container_type_2);
8239 void *c =
8240 container_ior(c1, type1: container_type_1, c2, type2: container_type_2,
8241 result_type: &container_result_type);
8242 if (c !=
8243 c1) { // in this instance a new container was created, and
8244 // we need to free the old one
8245 container_free(container: c1, typecode: container_type_1);
8246 }
8247
8248 ra_set_container_at_index(ra: &x1->high_low_container, i: pos1, c,
8249 typecode: container_result_type);
8250 }
8251 ++pos1;
8252 ++pos2;
8253 if (pos1 == length1) break;
8254 if (pos2 == length2) break;
8255 s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
8256 s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
8257
8258 } else if (s1 < s2) { // s1 < s2
8259 pos1++;
8260 if (pos1 == length1) break;
8261 s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
8262
8263 } else { // s1 > s2
8264 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2,
8265 typecode: &container_type_2);
8266 c2 =
8267 get_copy_of_container(container: c2, typecode: &container_type_2, copy_on_write: is_cow(r: x2));
8268 if (is_cow(r: x2)) {
8269 ra_set_container_at_index(ra: &x2->high_low_container, i: pos2, c: c2,
8270 typecode: container_type_2);
8271 }
8272
8273 // void *c2_clone = container_clone(c2, container_type_2);
8274 ra_insert_new_key_value_at(ra: &x1->high_low_container, i: pos1, key: s2, container: c2,
8275 typecode: container_type_2);
8276 pos1++;
8277 length1++;
8278 pos2++;
8279 if (pos2 == length2) break;
8280 s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
8281 }
8282 }
8283 if (pos1 == length1) {
8284 ra_append_copy_range(ra: &x1->high_low_container, sa: &x2->high_low_container,
8285 start_index: pos2, end_index: length2, copy_on_write: is_cow(r: x2));
8286 }
8287}
8288
8289roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1,
8290 const roaring_bitmap_t *x2) {
8291 uint8_t container_result_type = 0;
8292 const int length1 = x1->high_low_container.size,
8293 length2 = x2->high_low_container.size;
8294 if (0 == length1) {
8295 return roaring_bitmap_copy(r: x2);
8296 }
8297 if (0 == length2) {
8298 return roaring_bitmap_copy(r: x1);
8299 }
8300 roaring_bitmap_t *answer =
8301 roaring_bitmap_create_with_capacity(cap: length1 + length2);
8302 roaring_bitmap_set_copy_on_write(r: answer, cow: is_cow(r: x1) && is_cow(r: x2));
8303 int pos1 = 0, pos2 = 0;
8304 uint8_t container_type_1, container_type_2;
8305 uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
8306 uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
8307 while (true) {
8308 if (s1 == s2) {
8309 void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1,
8310 typecode: &container_type_1);
8311 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2,
8312 typecode: &container_type_2);
8313 void *c = container_xor(c1, type1: container_type_1, c2, type2: container_type_2,
8314 result_type: &container_result_type);
8315
8316 if (container_nonzero_cardinality(container: c, typecode: container_result_type)) {
8317 ra_append(ra: &answer->high_low_container, s: s1, c,
8318 typecode: container_result_type);
8319 } else {
8320 container_free(container: c, typecode: container_result_type);
8321 }
8322 ++pos1;
8323 ++pos2;
8324 if (pos1 == length1) break;
8325 if (pos2 == length2) break;
8326 s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
8327 s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
8328
8329 } else if (s1 < s2) { // s1 < s2
8330 void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1,
8331 typecode: &container_type_1);
8332 c1 =
8333 get_copy_of_container(container: c1, typecode: &container_type_1, copy_on_write: is_cow(r: x1));
8334 if (is_cow(r: x1)) {
8335 ra_set_container_at_index(ra: &x1->high_low_container, i: pos1, c: c1,
8336 typecode: container_type_1);
8337 }
8338 ra_append(ra: &answer->high_low_container, s: s1, c: c1, typecode: container_type_1);
8339 pos1++;
8340 if (pos1 == length1) break;
8341 s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
8342
8343 } else { // s1 > s2
8344 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2,
8345 typecode: &container_type_2);
8346 c2 =
8347 get_copy_of_container(container: c2, typecode: &container_type_2, copy_on_write: is_cow(r: x2));
8348 if (is_cow(r: x2)) {
8349 ra_set_container_at_index(ra: &x2->high_low_container, i: pos2, c: c2,
8350 typecode: container_type_2);
8351 }
8352 ra_append(ra: &answer->high_low_container, s: s2, c: c2, typecode: container_type_2);
8353 pos2++;
8354 if (pos2 == length2) break;
8355 s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
8356 }
8357 }
8358 if (pos1 == length1) {
8359 ra_append_copy_range(ra: &answer->high_low_container,
8360 sa: &x2->high_low_container, start_index: pos2, end_index: length2,
8361 copy_on_write: is_cow(r: x2));
8362 } else if (pos2 == length2) {
8363 ra_append_copy_range(ra: &answer->high_low_container,
8364 sa: &x1->high_low_container, start_index: pos1, end_index: length1,
8365 copy_on_write: is_cow(r: x1));
8366 }
8367 return answer;
8368}
8369
8370// inplace xor (modifies its first argument).
8371
8372void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1,
8373 const roaring_bitmap_t *x2) {
8374 assert(x1 != x2);
8375 uint8_t container_result_type = 0;
8376 int length1 = x1->high_low_container.size;
8377 const int length2 = x2->high_low_container.size;
8378
8379 if (0 == length2) return;
8380
8381 if (0 == length1) {
8382 roaring_bitmap_overwrite(dest: x1, src: x2);
8383 return;
8384 }
8385
8386 // XOR can have new containers inserted from x2, but can also
8387 // lose containers when x1 and x2 are nonempty and identical.
8388
8389 int pos1 = 0, pos2 = 0;
8390 uint8_t container_type_1, container_type_2;
8391 uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
8392 uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
8393 while (true) {
8394 if (s1 == s2) {
8395 void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1,
8396 typecode: &container_type_1);
8397 c1 = get_writable_copy_if_shared(candidate_shared_container: c1, type: &container_type_1);
8398
8399 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2,
8400 typecode: &container_type_2);
8401 void *c = container_ixor(c1, type1: container_type_1, c2, type2: container_type_2,
8402 result_type: &container_result_type);
8403
8404 if (container_nonzero_cardinality(container: c, typecode: container_result_type)) {
8405 ra_set_container_at_index(ra: &x1->high_low_container, i: pos1, c,
8406 typecode: container_result_type);
8407 ++pos1;
8408 } else {
8409 container_free(container: c, typecode: container_result_type);
8410 ra_remove_at_index(ra: &x1->high_low_container, i: pos1);
8411 --length1;
8412 }
8413
8414 ++pos2;
8415 if (pos1 == length1) break;
8416 if (pos2 == length2) break;
8417 s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
8418 s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
8419
8420 } else if (s1 < s2) { // s1 < s2
8421 pos1++;
8422 if (pos1 == length1) break;
8423 s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
8424
8425 } else { // s1 > s2
8426 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2,
8427 typecode: &container_type_2);
8428 c2 =
8429 get_copy_of_container(container: c2, typecode: &container_type_2, copy_on_write: is_cow(r: x2));
8430 if (is_cow(r: x2)) {
8431 ra_set_container_at_index(ra: &x2->high_low_container, i: pos2, c: c2,
8432 typecode: container_type_2);
8433 }
8434
8435 ra_insert_new_key_value_at(ra: &x1->high_low_container, i: pos1, key: s2, container: c2,
8436 typecode: container_type_2);
8437 pos1++;
8438 length1++;
8439 pos2++;
8440 if (pos2 == length2) break;
8441 s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
8442 }
8443 }
8444 if (pos1 == length1) {
8445 ra_append_copy_range(ra: &x1->high_low_container, sa: &x2->high_low_container,
8446 start_index: pos2, end_index: length2, copy_on_write: is_cow(r: x2));
8447 }
8448}
8449
8450roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1,
8451 const roaring_bitmap_t *x2) {
8452 uint8_t container_result_type = 0;
8453 const int length1 = x1->high_low_container.size,
8454 length2 = x2->high_low_container.size;
8455 if (0 == length1) {
8456 roaring_bitmap_t *empty_bitmap = roaring_bitmap_create();
8457 roaring_bitmap_set_copy_on_write(r: empty_bitmap, cow: is_cow(r: x1) && is_cow(r: x2));
8458 return empty_bitmap;
8459 }
8460 if (0 == length2) {
8461 return roaring_bitmap_copy(r: x1);
8462 }
8463 roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(cap: length1);
8464 roaring_bitmap_set_copy_on_write(r: answer, cow: is_cow(r: x1) && is_cow(r: x2));
8465
8466 int pos1 = 0, pos2 = 0;
8467 uint8_t container_type_1, container_type_2;
8468 uint16_t s1 = 0;
8469 uint16_t s2 = 0;
8470 while (true) {
8471 s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
8472 s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
8473
8474 if (s1 == s2) {
8475 void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1,
8476 typecode: &container_type_1);
8477 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2,
8478 typecode: &container_type_2);
8479 void *c =
8480 container_andnot(c1, type1: container_type_1, c2, type2: container_type_2,
8481 result_type: &container_result_type);
8482
8483 if (container_nonzero_cardinality(container: c, typecode: container_result_type)) {
8484 ra_append(ra: &answer->high_low_container, s: s1, c,
8485 typecode: container_result_type);
8486 } else {
8487 container_free(container: c, typecode: container_result_type);
8488 }
8489 ++pos1;
8490 ++pos2;
8491 if (pos1 == length1) break;
8492 if (pos2 == length2) break;
8493 } else if (s1 < s2) { // s1 < s2
8494 const int next_pos1 =
8495 ra_advance_until(ra: &x1->high_low_container, x: s2, pos: pos1);
8496 ra_append_copy_range(ra: &answer->high_low_container,
8497 sa: &x1->high_low_container, start_index: pos1, end_index: next_pos1,
8498 copy_on_write: is_cow(r: x1));
8499 // TODO : perhaps some of the copy_on_write should be based on
8500 // answer rather than x1 (more stringent?). Many similar cases
8501 pos1 = next_pos1;
8502 if (pos1 == length1) break;
8503 } else { // s1 > s2
8504 pos2 = ra_advance_until(ra: &x2->high_low_container, x: s1, pos: pos2);
8505 if (pos2 == length2) break;
8506 }
8507 }
8508 if (pos2 == length2) {
8509 ra_append_copy_range(ra: &answer->high_low_container,
8510 sa: &x1->high_low_container, start_index: pos1, end_index: length1,
8511 copy_on_write: is_cow(r: x1));
8512 }
8513 return answer;
8514}
8515
8516// inplace andnot (modifies its first argument).
8517
8518void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1,
8519 const roaring_bitmap_t *x2) {
8520 assert(x1 != x2);
8521
8522 uint8_t container_result_type = 0;
8523 int length1 = x1->high_low_container.size;
8524 const int length2 = x2->high_low_container.size;
8525 int intersection_size = 0;
8526
8527 if (0 == length2) return;
8528
8529 if (0 == length1) {
8530 roaring_bitmap_clear(r: x1);
8531 return;
8532 }
8533
8534 int pos1 = 0, pos2 = 0;
8535 uint8_t container_type_1, container_type_2;
8536 uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
8537 uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
8538 while (true) {
8539 if (s1 == s2) {
8540 void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1,
8541 typecode: &container_type_1);
8542 c1 = get_writable_copy_if_shared(candidate_shared_container: c1, type: &container_type_1);
8543
8544 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2,
8545 typecode: &container_type_2);
8546 void *c =
8547 container_iandnot(c1, type1: container_type_1, c2, type2: container_type_2,
8548 result_type: &container_result_type);
8549
8550 if (container_nonzero_cardinality(container: c, typecode: container_result_type)) {
8551 ra_replace_key_and_container_at_index(ra: &x1->high_low_container,
8552 i: intersection_size++, key: s1,
8553 c, typecode: container_result_type);
8554 } else {
8555 container_free(container: c, typecode: container_result_type);
8556 }
8557
8558 ++pos1;
8559 ++pos2;
8560 if (pos1 == length1) break;
8561 if (pos2 == length2) break;
8562 s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
8563 s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
8564
8565 } else if (s1 < s2) { // s1 < s2
8566 if (pos1 != intersection_size) {
8567 void *c1 = ra_get_container_at_index(ra: &x1->high_low_container,
8568 i: pos1, typecode: &container_type_1);
8569
8570 ra_replace_key_and_container_at_index(ra: &x1->high_low_container,
8571 i: intersection_size, key: s1, c: c1,
8572 typecode: container_type_1);
8573 }
8574 intersection_size++;
8575 pos1++;
8576 if (pos1 == length1) break;
8577 s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
8578
8579 } else { // s1 > s2
8580 pos2 = ra_advance_until(ra: &x2->high_low_container, x: s1, pos: pos2);
8581 if (pos2 == length2) break;
8582 s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
8583 }
8584 }
8585
8586 if (pos1 < length1) {
8587 // all containers between intersection_size and
8588 // pos1 are junk. However, they have either been moved
8589 // (thus still referenced) or involved in an iandnot
8590 // that will clean up all containers that could not be reused.
8591 // Thus we should not free the junk containers between
8592 // intersection_size and pos1.
8593 if (pos1 > intersection_size) {
8594 // left slide of remaining items
8595 ra_copy_range(ra: &x1->high_low_container, begin: pos1, end: length1,
8596 new_begin: intersection_size);
8597 }
8598 // else current placement is fine
8599 intersection_size += (length1 - pos1);
8600 }
8601 ra_downsize(ra: &x1->high_low_container, new_length: intersection_size);
8602}
8603
8604uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *ra) {
8605 uint64_t card = 0;
8606 for (int i = 0; i < ra->high_low_container.size; ++i)
8607 card += container_get_cardinality(container: ra->high_low_container.containers[i],
8608 typecode: ra->high_low_container.typecodes[i]);
8609 return card;
8610}
8611
8612uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *ra,
8613 uint64_t range_start,
8614 uint64_t range_end) {
8615 if (range_end > UINT32_MAX) {
8616 range_end = UINT32_MAX + UINT64_C(1);
8617 }
8618 if (range_start >= range_end) {
8619 return 0;
8620 }
8621 range_end--; // make range_end inclusive
8622 // now we have: 0 <= range_start <= range_end <= UINT32_MAX
8623
8624 uint16_t minhb = range_start >> 16;
8625 uint16_t maxhb = range_end >> 16;
8626
8627 uint64_t card = 0;
8628
8629 int i = ra_get_index(ra: &ra->high_low_container, x: minhb);
8630 if (i >= 0) {
8631 if (minhb == maxhb) {
8632 card += container_rank(container: ra->high_low_container.containers[i],
8633 typecode: ra->high_low_container.typecodes[i],
8634 x: range_end & 0xffff);
8635 } else {
8636 card += container_get_cardinality(container: ra->high_low_container.containers[i],
8637 typecode: ra->high_low_container.typecodes[i]);
8638 }
8639 if ((range_start & 0xffff) != 0) {
8640 card -= container_rank(container: ra->high_low_container.containers[i],
8641 typecode: ra->high_low_container.typecodes[i],
8642 x: (range_start & 0xffff) - 1);
8643 }
8644 i++;
8645 } else {
8646 i = -i - 1;
8647 }
8648
8649 for (; i < ra->high_low_container.size; i++) {
8650 uint16_t key = ra->high_low_container.keys[i];
8651 if (key < maxhb) {
8652 card += container_get_cardinality(container: ra->high_low_container.containers[i],
8653 typecode: ra->high_low_container.typecodes[i]);
8654 } else if (key == maxhb) {
8655 card += container_rank(container: ra->high_low_container.containers[i],
8656 typecode: ra->high_low_container.typecodes[i],
8657 x: range_end & 0xffff);
8658 break;
8659 } else {
8660 break;
8661 }
8662 }
8663
8664 return card;
8665}
8666
8667
8668bool roaring_bitmap_is_empty(const roaring_bitmap_t *ra) {
8669 return ra->high_low_container.size == 0;
8670}
8671
8672void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *ra, uint32_t *ans) {
8673 ra_to_uint32_array(ra: &ra->high_low_container, ans);
8674}
8675
8676bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *ra, size_t offset, size_t limit, uint32_t *ans) {
8677 return ra_range_uint32_array(ra: &ra->high_low_container, offset, limit, ans);
8678}
8679
8680/** convert array and bitmap containers to run containers when it is more
8681 * efficient;
8682 * also convert from run containers when more space efficient. Returns
8683 * true if the result has at least one run container.
8684*/
8685bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) {
8686 bool answer = false;
8687 for (int i = 0; i < r->high_low_container.size; i++) {
8688 uint8_t typecode_original, typecode_after;
8689 ra_unshare_container_at_index(
8690 ra: &r->high_low_container, i); // TODO: this introduces extra cloning!
8691 void *c = ra_get_container_at_index(ra: &r->high_low_container, i,
8692 typecode: &typecode_original);
8693 void *c1 = convert_run_optimize(c, typecode_original, typecode_after: &typecode_after);
8694 if (typecode_after == RUN_CONTAINER_TYPE_CODE) answer = true;
8695 ra_set_container_at_index(ra: &r->high_low_container, i, c: c1,
8696 typecode: typecode_after);
8697 }
8698 return answer;
8699}
8700
8701size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) {
8702 size_t answer = 0;
8703 for (int i = 0; i < r->high_low_container.size; i++) {
8704 uint8_t typecode_original;
8705 void *c = ra_get_container_at_index(ra: &r->high_low_container, i,
8706 typecode: &typecode_original);
8707 answer += container_shrink_to_fit(container: c, typecode: typecode_original);
8708 }
8709 answer += ra_shrink_to_fit(ra: &r->high_low_container);
8710 return answer;
8711}
8712
8713/**
8714 * Remove run-length encoding even when it is more space efficient
8715 * return whether a change was applied
8716 */
8717bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) {
8718 bool answer = false;
8719 for (int i = 0; i < r->high_low_container.size; i++) {
8720 uint8_t typecode_original, typecode_after;
8721 void *c = ra_get_container_at_index(ra: &r->high_low_container, i,
8722 typecode: &typecode_original);
8723 if (get_container_type(container: c, type: typecode_original) ==
8724 RUN_CONTAINER_TYPE_CODE) {
8725 answer = true;
8726 if (typecode_original == SHARED_CONTAINER_TYPE_CODE) {
8727 run_container_t *truec =
8728 (run_container_t *)((shared_container_t *)c)->container;
8729 int32_t card = run_container_cardinality(run: truec);
8730 void *c1 = convert_to_bitset_or_array_container(
8731 r: truec, card, resulttype: &typecode_after);
8732 shared_container_free(container: (shared_container_t *)c);// will free the run container as needed
8733 ra_set_container_at_index(ra: &r->high_low_container, i, c: c1,
8734 typecode: typecode_after);
8735
8736 } else {
8737 int32_t card = run_container_cardinality(run: (run_container_t *)c);
8738 void *c1 = convert_to_bitset_or_array_container(
8739 r: (run_container_t *)c, card, resulttype: &typecode_after);
8740 run_container_free(run: (run_container_t *)c);
8741 ra_set_container_at_index(ra: &r->high_low_container, i, c: c1,
8742 typecode: typecode_after);
8743 }
8744 }
8745 }
8746 return answer;
8747}
8748
8749size_t roaring_bitmap_serialize(const roaring_bitmap_t *ra, char *buf) {
8750 size_t portablesize = roaring_bitmap_portable_size_in_bytes(ra);
8751 uint64_t cardinality = roaring_bitmap_get_cardinality(ra);
8752 uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t);
8753 if (portablesize < sizeasarray) {
8754 buf[0] = SERIALIZATION_CONTAINER;
8755 return roaring_bitmap_portable_serialize(ra, buf: buf + 1) + 1;
8756 } else {
8757 buf[0] = SERIALIZATION_ARRAY_UINT32;
8758 memcpy(dest: buf + 1, src: &cardinality, n: sizeof(uint32_t));
8759 roaring_bitmap_to_uint32_array(
8760 ra, ans: (uint32_t *)(buf + 1 + sizeof(uint32_t)));
8761 return 1 + (size_t)sizeasarray;
8762 }
8763}
8764
8765size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *ra) {
8766 size_t portablesize = roaring_bitmap_portable_size_in_bytes(ra);
8767 uint64_t sizeasarray = roaring_bitmap_get_cardinality(ra) * sizeof(uint32_t) +
8768 sizeof(uint32_t);
8769 return portablesize < sizeasarray ? portablesize + 1 : (size_t)sizeasarray + 1;
8770}
8771
8772size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *ra) {
8773 return ra_portable_size_in_bytes(ra: &ra->high_low_container);
8774}
8775
8776
8777roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) {
8778 roaring_bitmap_t *ans =
8779 (roaring_bitmap_t *)malloc(size: sizeof(roaring_bitmap_t));
8780 if (ans == NULL) {
8781 return NULL;
8782 }
8783 size_t bytesread;
8784 bool is_ok = ra_portable_deserialize(ra: &ans->high_low_container, buf, maxbytes, readbytes: &bytesread);
8785 if(is_ok) assert(bytesread <= maxbytes);
8786 roaring_bitmap_set_copy_on_write(r: ans, false);
8787 if (!is_ok) {
8788 free(ptr: ans);
8789 return NULL;
8790 }
8791 return ans;
8792}
8793
8794roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) {
8795 return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX);
8796}
8797
8798
8799size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes) {
8800 return ra_portable_deserialize_size(buf, maxbytes);
8801}
8802
8803
8804size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *ra,
8805 char *buf) {
8806 return ra_portable_serialize(ra: &ra->high_low_container, buf);
8807}
8808
8809roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) {
8810 const char *bufaschar = (const char *)buf;
8811 if (*(const unsigned char *)buf == SERIALIZATION_ARRAY_UINT32) {
8812 /* This looks like a compressed set of uint32_t elements */
8813 uint32_t card;
8814 memcpy(dest: &card, src: bufaschar + 1, n: sizeof(uint32_t));
8815 const uint32_t *elems =
8816 (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t));
8817
8818 return roaring_bitmap_of_ptr(n_args: card, vals: elems);
8819 } else if (bufaschar[0] == SERIALIZATION_CONTAINER) {
8820 return roaring_bitmap_portable_deserialize(buf: bufaschar + 1);
8821 } else
8822 return (NULL);
8823}
8824
8825bool roaring_iterate(const roaring_bitmap_t *ra, roaring_iterator iterator,
8826 void *ptr) {
8827 for (int i = 0; i < ra->high_low_container.size; ++i)
8828 if (!container_iterate(container: ra->high_low_container.containers[i],
8829 typecode: ra->high_low_container.typecodes[i],
8830 base: ((uint32_t)ra->high_low_container.keys[i]) << 16,
8831 iterator, ptr)) {
8832 return false;
8833 }
8834 return true;
8835}
8836
8837bool roaring_iterate64(const roaring_bitmap_t *ra, roaring_iterator64 iterator,
8838 uint64_t high_bits, void *ptr) {
8839 for (int i = 0; i < ra->high_low_container.size; ++i)
8840 if (!container_iterate64(
8841 container: ra->high_low_container.containers[i],
8842 typecode: ra->high_low_container.typecodes[i],
8843 base: ((uint32_t)ra->high_low_container.keys[i]) << 16, iterator,
8844 high_bits, ptr)) {
8845 return false;
8846 }
8847 return true;
8848}
8849
8850/****
8851* begin roaring_uint32_iterator_t
8852*****/
8853
8854// Partially initializes the roaring iterator when it begins looking at
8855// a new container.
8856static bool iter_new_container_partial_init(roaring_uint32_iterator_t *newit) {
8857 newit->in_container_index = 0;
8858 newit->run_index = 0;
8859 newit->current_value = 0;
8860 if (newit->container_index >= newit->parent->high_low_container.size ||
8861 newit->container_index < 0) {
8862 newit->current_value = UINT32_MAX;
8863 return (newit->has_value = false);
8864 }
8865 // assume not empty
8866 newit->has_value = true;
8867 // we precompute container, typecode and highbits so that successive
8868 // iterators do not have to grab them from odd memory locations
8869 // and have to worry about the (easily predicted) container_unwrap_shared
8870 // call.
8871 newit->container =
8872 newit->parent->high_low_container.containers[newit->container_index];
8873 newit->typecode =
8874 newit->parent->high_low_container.typecodes[newit->container_index];
8875 newit->highbits =
8876 ((uint32_t)
8877 newit->parent->high_low_container.keys[newit->container_index])
8878 << 16;
8879 newit->container =
8880 container_unwrap_shared(candidate_shared_container: newit->container, type: &(newit->typecode));
8881 return newit->has_value;
8882}
8883
8884static bool loadfirstvalue(roaring_uint32_iterator_t *newit) {
8885 if (!iter_new_container_partial_init(newit))
8886 return newit->has_value;
8887
8888 uint32_t wordindex;
8889 uint64_t word; // used for bitsets
8890 switch (newit->typecode) {
8891 case BITSET_CONTAINER_TYPE_CODE:
8892 wordindex = 0;
8893 while ((word = ((const bitset_container_t *)(newit->container))
8894 ->array[wordindex]) == 0)
8895 wordindex++; // advance
8896 // here "word" is non-zero
8897 newit->in_container_index = wordindex * 64 + __builtin_ctzll(word);
8898 newit->current_value = newit->highbits | newit->in_container_index;
8899 break;
8900 case ARRAY_CONTAINER_TYPE_CODE:
8901 newit->current_value =
8902 newit->highbits |
8903 ((const array_container_t *)(newit->container))->array[0];
8904 break;
8905 case RUN_CONTAINER_TYPE_CODE:
8906 newit->current_value =
8907 newit->highbits |
8908 (((const run_container_t *)(newit->container))->runs[0].value);
8909 break;
8910 default:
8911 // if this ever happens, bug!
8912 assert(false);
8913 } // switch (typecode)
8914 return true;
8915}
8916
8917static bool loadlastvalue(roaring_uint32_iterator_t* newit) {
8918 if (!iter_new_container_partial_init(newit))
8919 return newit->has_value;
8920
8921 switch(newit->typecode) {
8922 case BITSET_CONTAINER_TYPE_CODE: {
8923 uint32_t wordindex = BITSET_CONTAINER_SIZE_IN_WORDS - 1;
8924 uint64_t word;
8925 const bitset_container_t* bitset_container = (const bitset_container_t*)newit->container;
8926 while ((word = bitset_container->array[wordindex]) == 0)
8927 --wordindex;
8928
8929 int num_leading_zeros = __builtin_clzll(word);
8930 newit->in_container_index = (wordindex * 64) + (63 - num_leading_zeros);
8931 newit->current_value = newit->highbits | newit->in_container_index;
8932 break;
8933 }
8934 case ARRAY_CONTAINER_TYPE_CODE: {
8935 const array_container_t* array_container = (const array_container_t*)newit->container;
8936 newit->in_container_index = array_container->cardinality - 1;
8937 newit->current_value = newit->highbits | array_container->array[newit->in_container_index];
8938 break;
8939 }
8940 case RUN_CONTAINER_TYPE_CODE: {
8941 const run_container_t* run_container = (const run_container_t*)newit->container;
8942 newit->run_index = run_container->n_runs - 1;
8943 const rle16_t* last_run = &run_container->runs[newit->run_index];
8944 newit->current_value = newit->highbits | (last_run->value + last_run->length);
8945 break;
8946 }
8947 default:
8948 // if this ever happens, bug!
8949 assert(false);
8950 }
8951 return true;
8952}
8953
8954// prerequesite: the value should be in range of the container
8955static bool loadfirstvalue_largeorequal(roaring_uint32_iterator_t *newit, uint32_t val) {
8956 // Don't have to check return value because of prerequisite
8957 iter_new_container_partial_init(newit);
8958 uint16_t lb = val & 0xFFFF;
8959
8960 switch (newit->typecode) {
8961 case BITSET_CONTAINER_TYPE_CODE:
8962 newit->in_container_index = bitset_container_index_equalorlarger(container: (const bitset_container_t *)(newit->container), x: lb);
8963 newit->current_value = newit->highbits | newit->in_container_index;
8964 break;
8965 case ARRAY_CONTAINER_TYPE_CODE:
8966 newit->in_container_index = array_container_index_equalorlarger(arr: (const array_container_t *)(newit->container), x: lb);
8967 newit->current_value =
8968 newit->highbits |
8969 ((const array_container_t *)(newit->container))->array[newit->in_container_index];
8970 break;
8971 case RUN_CONTAINER_TYPE_CODE:
8972 newit->run_index = run_container_index_equalorlarger(arr: (const run_container_t *)(newit->container), x: lb);
8973 if(((const run_container_t *)(newit->container))->runs[newit->run_index].value <= lb) {
8974 newit->current_value = val;
8975 } else {
8976 newit->current_value =
8977 newit->highbits |
8978 (((const run_container_t *)(newit->container))->runs[newit->run_index].value);
8979 }
8980 break;
8981 default:
8982 // if this ever happens, bug!
8983 assert(false);
8984 } // switch (typecode)
8985 return true;
8986}
8987
8988void roaring_init_iterator(const roaring_bitmap_t *ra,
8989 roaring_uint32_iterator_t *newit) {
8990 newit->parent = ra;
8991 newit->container_index = 0;
8992 newit->has_value = loadfirstvalue(newit);
8993}
8994
8995void roaring_init_iterator_last(const roaring_bitmap_t *ra,
8996 roaring_uint32_iterator_t *newit) {
8997 newit->parent = ra;
8998 newit->container_index = newit->parent->high_low_container.size - 1;
8999 newit->has_value = loadlastvalue(newit);
9000}
9001
9002roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *ra) {
9003 roaring_uint32_iterator_t *newit =
9004 (roaring_uint32_iterator_t *)malloc(size: sizeof(roaring_uint32_iterator_t));
9005 if (newit == NULL) return NULL;
9006 roaring_init_iterator(ra, newit);
9007 return newit;
9008}
9009
9010roaring_uint32_iterator_t *roaring_copy_uint32_iterator(
9011 const roaring_uint32_iterator_t *it) {
9012 roaring_uint32_iterator_t *newit =
9013 (roaring_uint32_iterator_t *)malloc(size: sizeof(roaring_uint32_iterator_t));
9014 memcpy(dest: newit, src: it, n: sizeof(roaring_uint32_iterator_t));
9015 return newit;
9016}
9017
9018bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) {
9019 uint16_t hb = val >> 16;
9020 const int i = ra_get_index(ra: & it->parent->high_low_container, x: hb);
9021 if (i >= 0) {
9022 uint32_t lowvalue = container_maximum(container: it->parent->high_low_container.containers[i], typecode: it->parent->high_low_container.typecodes[i]);
9023 uint16_t lb = val & 0xFFFF;
9024 if(lowvalue < lb ) {
9025 it->container_index = i+1; // will have to load first value of next container
9026 } else {// the value is necessarily within the range of the container
9027 it->container_index = i;
9028 it->has_value = loadfirstvalue_largeorequal(newit: it, val);
9029 return it->has_value;
9030 }
9031 } else {
9032 // there is no matching, so we are going for the next container
9033 it->container_index = -i-1;
9034 }
9035 it->has_value = loadfirstvalue(newit: it);
9036 return it->has_value;
9037}
9038
9039
9040bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) {
9041 if (it->container_index >= it->parent->high_low_container.size) {
9042 return (it->has_value = false);
9043 }
9044 if (it->container_index < 0) {
9045 it->container_index = 0;
9046 return (it->has_value = loadfirstvalue(newit: it));
9047 }
9048
9049 uint32_t wordindex; // used for bitsets
9050 uint64_t word; // used for bitsets
9051 switch (it->typecode) {
9052 case BITSET_CONTAINER_TYPE_CODE:
9053 it->in_container_index++;
9054 wordindex = it->in_container_index / 64;
9055 if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) break;
9056 word = ((const bitset_container_t *)(it->container))
9057 ->array[wordindex] &
9058 (UINT64_MAX << (it->in_container_index % 64));
9059 // next part could be optimized/simplified
9060 while ((word == 0) &&
9061 (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) {
9062 wordindex++;
9063 word = ((const bitset_container_t *)(it->container))
9064 ->array[wordindex];
9065 }
9066 if (word != 0) {
9067 it->in_container_index = wordindex * 64 + __builtin_ctzll(word);
9068 it->current_value = it->highbits | it->in_container_index;
9069 return (it->has_value = true);
9070 }
9071 break;
9072 case ARRAY_CONTAINER_TYPE_CODE:
9073 it->in_container_index++;
9074 if (it->in_container_index <
9075 ((const array_container_t *)(it->container))->cardinality) {
9076 it->current_value = it->highbits |
9077 ((const array_container_t *)(it->container))
9078 ->array[it->in_container_index];
9079 return (it->has_value = true);
9080 }
9081 break;
9082 case RUN_CONTAINER_TYPE_CODE: {
9083 if(it->current_value == UINT32_MAX) {
9084 return (it->has_value = false); // without this, we risk an overflow to zero
9085 }
9086
9087 const run_container_t* run_container = (const run_container_t*)it->container;
9088 if (++it->current_value <= (it->highbits | (run_container->runs[it->run_index].value +
9089 run_container->runs[it->run_index].length))) {
9090 return (it->has_value = true);
9091 }
9092
9093 if (++it->run_index < run_container->n_runs) {
9094 // Assume the run has a value
9095 it->current_value = it->highbits | run_container->runs[it->run_index].value;
9096 return (it->has_value = true);
9097 }
9098 break;
9099 }
9100 default:
9101 // if this ever happens, bug!
9102 assert(false);
9103 } // switch (typecode)
9104 // moving to next container
9105 it->container_index++;
9106 return (it->has_value = loadfirstvalue(newit: it));
9107}
9108
9109bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it) {
9110 if (it->container_index < 0) {
9111 return (it->has_value = false);
9112 }
9113 if (it->container_index >= it->parent->high_low_container.size) {
9114 it->container_index = it->parent->high_low_container.size - 1;
9115 return (it->has_value = loadlastvalue(newit: it));
9116 }
9117
9118 switch (it->typecode) {
9119 case BITSET_CONTAINER_TYPE_CODE: {
9120 if (--it->in_container_index < 0)
9121 break;
9122
9123 const bitset_container_t* bitset_container = (const bitset_container_t*)it->container;
9124 int32_t wordindex = it->in_container_index / 64;
9125 uint64_t word = bitset_container->array[wordindex] & (UINT64_MAX >> (63 - (it->in_container_index % 64)));
9126
9127 while (word == 0 && --wordindex >= 0) {
9128 word = bitset_container->array[wordindex];
9129 }
9130 if (word == 0)
9131 break;
9132
9133 int num_leading_zeros = __builtin_clzll(word);
9134 it->in_container_index = (wordindex * 64) + (63 - num_leading_zeros);
9135 it->current_value = it->highbits | it->in_container_index;
9136 return (it->has_value = true);
9137 }
9138 case ARRAY_CONTAINER_TYPE_CODE: {
9139 if (--it->in_container_index < 0)
9140 break;
9141
9142 const array_container_t* array_container = (const array_container_t*)it->container;
9143 it->current_value = it->highbits | array_container->array[it->in_container_index];
9144 return (it->has_value = true);
9145 }
9146 case RUN_CONTAINER_TYPE_CODE: {
9147 if(it->current_value == 0)
9148 return (it->has_value = false);
9149
9150 const run_container_t* run_container = (const run_container_t*)it->container;
9151 if (--it->current_value >= (it->highbits | run_container->runs[it->run_index].value)) {
9152 return (it->has_value = true);
9153 }
9154
9155 if (--it->run_index < 0)
9156 break;
9157
9158 it->current_value = it->highbits | (run_container->runs[it->run_index].value +
9159 run_container->runs[it->run_index].length);
9160 return (it->has_value = true);
9161 }
9162 default:
9163 // if this ever happens, bug!
9164 assert(false);
9165 } // switch (typecode)
9166
9167 // moving to previous container
9168 it->container_index--;
9169 return (it->has_value = loadlastvalue(newit: it));
9170}
9171
9172uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count) {
9173 uint32_t ret = 0;
9174 uint32_t num_values;
9175 uint32_t wordindex; // used for bitsets
9176 uint64_t word; // used for bitsets
9177 const array_container_t* acont; //TODO remove
9178 const run_container_t* rcont; //TODO remove
9179 const bitset_container_t* bcont; //TODO remove
9180
9181 while (it->has_value && ret < count) {
9182 switch (it->typecode) {
9183 case BITSET_CONTAINER_TYPE_CODE:
9184 bcont = (const bitset_container_t*)(it->container);
9185 wordindex = it->in_container_index / 64;
9186 word = bcont->array[wordindex] & (UINT64_MAX << (it->in_container_index % 64));
9187 do {
9188 while (word != 0 && ret < count) {
9189 buf[0] = it->highbits | (wordindex * 64 + __builtin_ctzll(word));
9190 word = word & (word - 1);
9191 buf++;
9192 ret++;
9193 }
9194 while (word == 0 && wordindex+1 < BITSET_CONTAINER_SIZE_IN_WORDS) {
9195 wordindex++;
9196 word = bcont->array[wordindex];
9197 }
9198 } while (word != 0 && ret < count);
9199 it->has_value = (word != 0);
9200 if (it->has_value) {
9201 it->in_container_index = wordindex * 64 + __builtin_ctzll(word);
9202 it->current_value = it->highbits | it->in_container_index;
9203 }
9204 break;
9205 case ARRAY_CONTAINER_TYPE_CODE:
9206 acont = (const array_container_t *)(it->container);
9207 num_values = minimum_uint32(a: acont->cardinality - it->in_container_index, b: count - ret);
9208 for (uint32_t i = 0; i < num_values; i++) {
9209 buf[i] = it->highbits | acont->array[it->in_container_index + i];
9210 }
9211 buf += num_values;
9212 ret += num_values;
9213 it->in_container_index += num_values;
9214 it->has_value = (it->in_container_index < acont->cardinality);
9215 if (it->has_value) {
9216 it->current_value = it->highbits | acont->array[it->in_container_index];
9217 }
9218 break;
9219 case RUN_CONTAINER_TYPE_CODE:
9220 rcont = (const run_container_t*)(it->container);
9221 //"in_run_index" name is misleading, read it as "max_value_in_current_run"
9222 do {
9223 uint32_t largest_run_value = it->highbits | (rcont->runs[it->run_index].value + rcont->runs[it->run_index].length);
9224 num_values = minimum_uint32(a: largest_run_value - it->current_value + 1, b: count - ret);
9225 for (uint32_t i = 0; i < num_values; i++) {
9226 buf[i] = it->current_value + i;
9227 }
9228 it->current_value += num_values; // this can overflow to zero: UINT32_MAX+1=0
9229 buf += num_values;
9230 ret += num_values;
9231
9232 if (it->current_value > largest_run_value || it->current_value == 0) {
9233 it->run_index++;
9234 if (it->run_index < rcont->n_runs) {
9235 it->current_value = it->highbits | rcont->runs[it->run_index].value;
9236 } else {
9237 it->has_value = false;
9238 }
9239 }
9240 } while ((ret < count) && it->has_value);
9241 break;
9242 default:
9243 assert(false);
9244 }
9245 if (it->has_value) {
9246 assert(ret == count);
9247 return ret;
9248 }
9249 it->container_index++;
9250 it->has_value = loadfirstvalue(newit: it);
9251 }
9252 return ret;
9253}
9254
9255
9256
9257void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it) { free(ptr: it); }
9258
9259/****
9260* end of roaring_uint32_iterator_t
9261*****/
9262
9263bool roaring_bitmap_equals(const roaring_bitmap_t *ra1,
9264 const roaring_bitmap_t *ra2) {
9265 if (ra1->high_low_container.size != ra2->high_low_container.size) {
9266 return false;
9267 }
9268 for (int i = 0; i < ra1->high_low_container.size; ++i) {
9269 if (ra1->high_low_container.keys[i] !=
9270 ra2->high_low_container.keys[i]) {
9271 return false;
9272 }
9273 }
9274 for (int i = 0; i < ra1->high_low_container.size; ++i) {
9275 bool areequal = container_equals(c1: ra1->high_low_container.containers[i],
9276 type1: ra1->high_low_container.typecodes[i],
9277 c2: ra2->high_low_container.containers[i],
9278 type2: ra2->high_low_container.typecodes[i]);
9279 if (!areequal) {
9280 return false;
9281 }
9282 }
9283 return true;
9284}
9285
9286bool roaring_bitmap_is_subset(const roaring_bitmap_t *ra1,
9287 const roaring_bitmap_t *ra2) {
9288 const int length1 = ra1->high_low_container.size,
9289 length2 = ra2->high_low_container.size;
9290
9291 int pos1 = 0, pos2 = 0;
9292
9293 while (pos1 < length1 && pos2 < length2) {
9294 const uint16_t s1 = ra_get_key_at_index(ra: &ra1->high_low_container, i: pos1);
9295 const uint16_t s2 = ra_get_key_at_index(ra: &ra2->high_low_container, i: pos2);
9296
9297 if (s1 == s2) {
9298 uint8_t container_type_1, container_type_2;
9299 void *c1 = ra_get_container_at_index(ra: &ra1->high_low_container, i: pos1,
9300 typecode: &container_type_1);
9301 void *c2 = ra_get_container_at_index(ra: &ra2->high_low_container, i: pos2,
9302 typecode: &container_type_2);
9303 bool subset =
9304 container_is_subset(c1, type1: container_type_1, c2, type2: container_type_2);
9305 if (!subset) return false;
9306 ++pos1;
9307 ++pos2;
9308 } else if (s1 < s2) { // s1 < s2
9309 return false;
9310 } else { // s1 > s2
9311 pos2 = ra_advance_until(ra: &ra2->high_low_container, x: s1, pos: pos2);
9312 }
9313 }
9314 if (pos1 == length1)
9315 return true;
9316 else
9317 return false;
9318}
9319
9320static void insert_flipped_container(roaring_array_t *ans_arr,
9321 const roaring_array_t *x1_arr, uint16_t hb,
9322 uint16_t lb_start, uint16_t lb_end) {
9323 const int i = ra_get_index(ra: x1_arr, x: hb);
9324 const int j = ra_get_index(ra: ans_arr, x: hb);
9325 uint8_t ctype_in, ctype_out;
9326 void *flipped_container = NULL;
9327 if (i >= 0) {
9328 void *container_to_flip =
9329 ra_get_container_at_index(ra: x1_arr, i, typecode: &ctype_in);
9330 flipped_container =
9331 container_not_range(c: container_to_flip, typ: ctype_in, range_start: (uint32_t)lb_start,
9332 range_end: (uint32_t)(lb_end + 1), result_type: &ctype_out);
9333
9334 if (container_get_cardinality(container: flipped_container, typecode: ctype_out))
9335 ra_insert_new_key_value_at(ra: ans_arr, i: -j - 1, key: hb, container: flipped_container,
9336 typecode: ctype_out);
9337 else {
9338 container_free(container: flipped_container, typecode: ctype_out);
9339 }
9340 } else {
9341 flipped_container = container_range_of_ones(
9342 range_start: (uint32_t)lb_start, range_end: (uint32_t)(lb_end + 1), result_type: &ctype_out);
9343 ra_insert_new_key_value_at(ra: ans_arr, i: -j - 1, key: hb, container: flipped_container,
9344 typecode: ctype_out);
9345 }
9346}
9347
9348static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb,
9349 uint16_t lb_start, uint16_t lb_end) {
9350 const int i = ra_get_index(ra: x1_arr, x: hb);
9351 uint8_t ctype_in, ctype_out;
9352 void *flipped_container = NULL;
9353 if (i >= 0) {
9354 void *container_to_flip =
9355 ra_get_container_at_index(ra: x1_arr, i, typecode: &ctype_in);
9356 flipped_container = container_inot_range(
9357 c: container_to_flip, typ: ctype_in, range_start: (uint32_t)lb_start,
9358 range_end: (uint32_t)(lb_end + 1), result_type: &ctype_out);
9359 // if a new container was created, the old one was already freed
9360 if (container_get_cardinality(container: flipped_container, typecode: ctype_out)) {
9361 ra_set_container_at_index(ra: x1_arr, i, c: flipped_container, typecode: ctype_out);
9362 } else {
9363 container_free(container: flipped_container, typecode: ctype_out);
9364 ra_remove_at_index(ra: x1_arr, i);
9365 }
9366
9367 } else {
9368 flipped_container = container_range_of_ones(
9369 range_start: (uint32_t)lb_start, range_end: (uint32_t)(lb_end + 1), result_type: &ctype_out);
9370 ra_insert_new_key_value_at(ra: x1_arr, i: -i - 1, key: hb, container: flipped_container,
9371 typecode: ctype_out);
9372 }
9373}
9374
9375static void insert_fully_flipped_container(roaring_array_t *ans_arr,
9376 const roaring_array_t *x1_arr,
9377 uint16_t hb) {
9378 const int i = ra_get_index(ra: x1_arr, x: hb);
9379 const int j = ra_get_index(ra: ans_arr, x: hb);
9380 uint8_t ctype_in, ctype_out;
9381 void *flipped_container = NULL;
9382 if (i >= 0) {
9383 void *container_to_flip =
9384 ra_get_container_at_index(ra: x1_arr, i, typecode: &ctype_in);
9385 flipped_container =
9386 container_not(c: container_to_flip, typ: ctype_in, result_type: &ctype_out);
9387 if (container_get_cardinality(container: flipped_container, typecode: ctype_out))
9388 ra_insert_new_key_value_at(ra: ans_arr, i: -j - 1, key: hb, container: flipped_container,
9389 typecode: ctype_out);
9390 else {
9391 container_free(container: flipped_container, typecode: ctype_out);
9392 }
9393 } else {
9394 flipped_container = container_range_of_ones(range_start: 0U, range_end: 0x10000U, result_type: &ctype_out);
9395 ra_insert_new_key_value_at(ra: ans_arr, i: -j - 1, key: hb, container: flipped_container,
9396 typecode: ctype_out);
9397 }
9398}
9399
9400static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) {
9401 const int i = ra_get_index(ra: x1_arr, x: hb);
9402 uint8_t ctype_in, ctype_out;
9403 void *flipped_container = NULL;
9404 if (i >= 0) {
9405 void *container_to_flip =
9406 ra_get_container_at_index(ra: x1_arr, i, typecode: &ctype_in);
9407 flipped_container =
9408 container_inot(c: container_to_flip, typ: ctype_in, result_type: &ctype_out);
9409
9410 if (container_get_cardinality(container: flipped_container, typecode: ctype_out)) {
9411 ra_set_container_at_index(ra: x1_arr, i, c: flipped_container, typecode: ctype_out);
9412 } else {
9413 container_free(container: flipped_container, typecode: ctype_out);
9414 ra_remove_at_index(ra: x1_arr, i);
9415 }
9416
9417 } else {
9418 flipped_container = container_range_of_ones(range_start: 0U, range_end: 0x10000U, result_type: &ctype_out);
9419 ra_insert_new_key_value_at(ra: x1_arr, i: -i - 1, key: hb, container: flipped_container,
9420 typecode: ctype_out);
9421 }
9422}
9423
9424roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1,
9425 uint64_t range_start,
9426 uint64_t range_end) {
9427 if (range_start >= range_end) {
9428 return roaring_bitmap_copy(r: x1);
9429 }
9430 if(range_end >= UINT64_C(0x100000000)) {
9431 range_end = UINT64_C(0x100000000);
9432 }
9433
9434 roaring_bitmap_t *ans = roaring_bitmap_create();
9435 roaring_bitmap_set_copy_on_write(r: ans, cow: is_cow(r: x1));
9436
9437 uint16_t hb_start = (uint16_t)(range_start >> 16);
9438 const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF;
9439 uint16_t hb_end = (uint16_t)((range_end - 1) >> 16);
9440 const uint16_t lb_end = (uint16_t)(range_end - 1); // & 0xFFFF;
9441
9442 ra_append_copies_until(ra: &ans->high_low_container, sa: &x1->high_low_container,
9443 stopping_key: hb_start, copy_on_write: is_cow(r: x1));
9444 if (hb_start == hb_end) {
9445 insert_flipped_container(ans_arr: &ans->high_low_container,
9446 x1_arr: &x1->high_low_container, hb: hb_start, lb_start,
9447 lb_end);
9448 } else {
9449 // start and end containers are distinct
9450 if (lb_start > 0) {
9451 // handle first (partial) container
9452 insert_flipped_container(ans_arr: &ans->high_low_container,
9453 x1_arr: &x1->high_low_container, hb: hb_start,
9454 lb_start, lb_end: 0xFFFF);
9455 ++hb_start; // for the full containers. Can't wrap.
9456 }
9457
9458 if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block
9459
9460 for (uint32_t hb = hb_start; hb <= hb_end; ++hb) {
9461 insert_fully_flipped_container(ans_arr: &ans->high_low_container,
9462 x1_arr: &x1->high_low_container, hb);
9463 }
9464
9465 // handle a partial final container
9466 if (lb_end != 0xFFFF) {
9467 insert_flipped_container(ans_arr: &ans->high_low_container,
9468 x1_arr: &x1->high_low_container, hb: hb_end + 1, lb_start: 0,
9469 lb_end);
9470 ++hb_end;
9471 }
9472 }
9473 ra_append_copies_after(ra: &ans->high_low_container, sa: &x1->high_low_container,
9474 before_start: hb_end, copy_on_write: is_cow(r: x1));
9475 return ans;
9476}
9477
9478void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start,
9479 uint64_t range_end) {
9480 if (range_start >= range_end) {
9481 return; // empty range
9482 }
9483 if(range_end >= UINT64_C(0x100000000)) {
9484 range_end = UINT64_C(0x100000000);
9485 }
9486
9487 uint16_t hb_start = (uint16_t)(range_start >> 16);
9488 const uint16_t lb_start = (uint16_t)range_start;
9489 uint16_t hb_end = (uint16_t)((range_end - 1) >> 16);
9490 const uint16_t lb_end = (uint16_t)(range_end - 1);
9491
9492 if (hb_start == hb_end) {
9493 inplace_flip_container(x1_arr: &x1->high_low_container, hb: hb_start, lb_start,
9494 lb_end);
9495 } else {
9496 // start and end containers are distinct
9497 if (lb_start > 0) {
9498 // handle first (partial) container
9499 inplace_flip_container(x1_arr: &x1->high_low_container, hb: hb_start, lb_start,
9500 lb_end: 0xFFFF);
9501 ++hb_start; // for the full containers. Can't wrap.
9502 }
9503
9504 if (lb_end != 0xFFFF) --hb_end;
9505
9506 for (uint32_t hb = hb_start; hb <= hb_end; ++hb) {
9507 inplace_fully_flip_container(x1_arr: &x1->high_low_container, hb);
9508 }
9509 // handle a partial final container
9510 if (lb_end != 0xFFFF) {
9511 inplace_flip_container(x1_arr: &x1->high_low_container, hb: hb_end + 1, lb_start: 0,
9512 lb_end);
9513 ++hb_end;
9514 }
9515 }
9516}
9517
9518roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1,
9519 const roaring_bitmap_t *x2,
9520 const bool bitsetconversion) {
9521 uint8_t container_result_type = 0;
9522 const int length1 = x1->high_low_container.size,
9523 length2 = x2->high_low_container.size;
9524 if (0 == length1) {
9525 return roaring_bitmap_copy(r: x2);
9526 }
9527 if (0 == length2) {
9528 return roaring_bitmap_copy(r: x1);
9529 }
9530 roaring_bitmap_t *answer =
9531 roaring_bitmap_create_with_capacity(cap: length1 + length2);
9532 roaring_bitmap_set_copy_on_write(r: answer, cow: is_cow(r: x1) && is_cow(r: x2));
9533 int pos1 = 0, pos2 = 0;
9534 uint8_t container_type_1, container_type_2;
9535 uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
9536 uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
9537 while (true) {
9538 if (s1 == s2) {
9539 void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1,
9540 typecode: &container_type_1);
9541 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2,
9542 typecode: &container_type_2);
9543 void *c;
9544 if (bitsetconversion && (get_container_type(container: c1, type: container_type_1) !=
9545 BITSET_CONTAINER_TYPE_CODE) &&
9546 (get_container_type(container: c2, type: container_type_2) !=
9547 BITSET_CONTAINER_TYPE_CODE)) {
9548 void *newc1 =
9549 container_mutable_unwrap_shared(candidate_shared_container: c1, type: &container_type_1);
9550 newc1 = container_to_bitset(container: newc1, typecode: container_type_1);
9551 container_type_1 = BITSET_CONTAINER_TYPE_CODE;
9552 c = container_lazy_ior(c1: newc1, type1: container_type_1, c2,
9553 type2: container_type_2,
9554 result_type: &container_result_type);
9555 if (c != newc1) { // should not happen
9556 container_free(container: newc1, typecode: container_type_1);
9557 }
9558 } else {
9559 c = container_lazy_or(c1, type1: container_type_1, c2,
9560 type2: container_type_2, result_type: &container_result_type);
9561 }
9562 // since we assume that the initial containers are non-empty,
9563 // the
9564 // result here
9565 // can only be non-empty
9566 ra_append(ra: &answer->high_low_container, s: s1, c,
9567 typecode: container_result_type);
9568 ++pos1;
9569 ++pos2;
9570 if (pos1 == length1) break;
9571 if (pos2 == length2) break;
9572 s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
9573 s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
9574
9575 } else if (s1 < s2) { // s1 < s2
9576 void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1,
9577 typecode: &container_type_1);
9578 c1 =
9579 get_copy_of_container(container: c1, typecode: &container_type_1, copy_on_write: is_cow(r: x1));
9580 if (is_cow(r: x1)) {
9581 ra_set_container_at_index(ra: &x1->high_low_container, i: pos1, c: c1,
9582 typecode: container_type_1);
9583 }
9584 ra_append(ra: &answer->high_low_container, s: s1, c: c1, typecode: container_type_1);
9585 pos1++;
9586 if (pos1 == length1) break;
9587 s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
9588
9589 } else { // s1 > s2
9590 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2,
9591 typecode: &container_type_2);
9592 c2 =
9593 get_copy_of_container(container: c2, typecode: &container_type_2, copy_on_write: is_cow(r: x2));
9594 if (is_cow(r: x2)) {
9595 ra_set_container_at_index(ra: &x2->high_low_container, i: pos2, c: c2,
9596 typecode: container_type_2);
9597 }
9598 ra_append(ra: &answer->high_low_container, s: s2, c: c2, typecode: container_type_2);
9599 pos2++;
9600 if (pos2 == length2) break;
9601 s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
9602 }
9603 }
9604 if (pos1 == length1) {
9605 ra_append_copy_range(ra: &answer->high_low_container,
9606 sa: &x2->high_low_container, start_index: pos2, end_index: length2,
9607 copy_on_write: is_cow(r: x2));
9608 } else if (pos2 == length2) {
9609 ra_append_copy_range(ra: &answer->high_low_container,
9610 sa: &x1->high_low_container, start_index: pos1, end_index: length1,
9611 copy_on_write: is_cow(r: x1));
9612 }
9613 return answer;
9614}
9615
9616void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1,
9617 const roaring_bitmap_t *x2,
9618 const bool bitsetconversion) {
9619 uint8_t container_result_type = 0;
9620 int length1 = x1->high_low_container.size;
9621 const int length2 = x2->high_low_container.size;
9622
9623 if (0 == length2) return;
9624
9625 if (0 == length1) {
9626 roaring_bitmap_overwrite(dest: x1, src: x2);
9627 return;
9628 }
9629 int pos1 = 0, pos2 = 0;
9630 uint8_t container_type_1, container_type_2;
9631 uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
9632 uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
9633 while (true) {
9634 if (s1 == s2) {
9635 void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1,
9636 typecode: &container_type_1);
9637 if (!container_is_full(container: c1, typecode: container_type_1)) {
9638 if ((bitsetconversion == false) ||
9639 (get_container_type(container: c1, type: container_type_1) ==
9640 BITSET_CONTAINER_TYPE_CODE)) {
9641 c1 = get_writable_copy_if_shared(candidate_shared_container: c1, type: &container_type_1);
9642 } else {
9643 // convert to bitset
9644 void *oldc1 = c1;
9645 uint8_t oldt1 = container_type_1;
9646 c1 = container_mutable_unwrap_shared(candidate_shared_container: c1, type: &container_type_1);
9647 c1 = container_to_bitset(container: c1, typecode: container_type_1);
9648 container_free(container: oldc1, typecode: oldt1);
9649 container_type_1 = BITSET_CONTAINER_TYPE_CODE;
9650 }
9651
9652 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container,
9653 i: pos2, typecode: &container_type_2);
9654 void *c = container_lazy_ior(c1, type1: container_type_1, c2,
9655 type2: container_type_2,
9656 result_type: &container_result_type);
9657 if (c !=
9658 c1) { // in this instance a new container was created, and
9659 // we need to free the old one
9660 container_free(container: c1, typecode: container_type_1);
9661 }
9662
9663 ra_set_container_at_index(ra: &x1->high_low_container, i: pos1, c,
9664 typecode: container_result_type);
9665 }
9666 ++pos1;
9667 ++pos2;
9668 if (pos1 == length1) break;
9669 if (pos2 == length2) break;
9670 s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
9671 s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
9672
9673 } else if (s1 < s2) { // s1 < s2
9674 pos1++;
9675 if (pos1 == length1) break;
9676 s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
9677
9678 } else { // s1 > s2
9679 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2,
9680 typecode: &container_type_2);
9681 // void *c2_clone = container_clone(c2, container_type_2);
9682 c2 =
9683 get_copy_of_container(container: c2, typecode: &container_type_2, copy_on_write: is_cow(r: x2));
9684 if (is_cow(r: x2)) {
9685 ra_set_container_at_index(ra: &x2->high_low_container, i: pos2, c: c2,
9686 typecode: container_type_2);
9687 }
9688 ra_insert_new_key_value_at(ra: &x1->high_low_container, i: pos1, key: s2, container: c2,
9689 typecode: container_type_2);
9690 pos1++;
9691 length1++;
9692 pos2++;
9693 if (pos2 == length2) break;
9694 s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
9695 }
9696 }
9697 if (pos1 == length1) {
9698 ra_append_copy_range(ra: &x1->high_low_container, sa: &x2->high_low_container,
9699 start_index: pos2, end_index: length2, copy_on_write: is_cow(r: x2));
9700 }
9701}
9702
9703roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1,
9704 const roaring_bitmap_t *x2) {
9705 uint8_t container_result_type = 0;
9706 const int length1 = x1->high_low_container.size,
9707 length2 = x2->high_low_container.size;
9708 if (0 == length1) {
9709 return roaring_bitmap_copy(r: x2);
9710 }
9711 if (0 == length2) {
9712 return roaring_bitmap_copy(r: x1);
9713 }
9714 roaring_bitmap_t *answer =
9715 roaring_bitmap_create_with_capacity(cap: length1 + length2);
9716 roaring_bitmap_set_copy_on_write(r: answer, cow: is_cow(r: x1) && is_cow(r: x2));
9717 int pos1 = 0, pos2 = 0;
9718 uint8_t container_type_1, container_type_2;
9719 uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
9720 uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
9721 while (true) {
9722 if (s1 == s2) {
9723 void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1,
9724 typecode: &container_type_1);
9725 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2,
9726 typecode: &container_type_2);
9727 void *c =
9728 container_lazy_xor(c1, type1: container_type_1, c2, type2: container_type_2,
9729 result_type: &container_result_type);
9730
9731 if (container_nonzero_cardinality(container: c, typecode: container_result_type)) {
9732 ra_append(ra: &answer->high_low_container, s: s1, c,
9733 typecode: container_result_type);
9734 } else {
9735 container_free(container: c, typecode: container_result_type);
9736 }
9737
9738 ++pos1;
9739 ++pos2;
9740 if (pos1 == length1) break;
9741 if (pos2 == length2) break;
9742 s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
9743 s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
9744
9745 } else if (s1 < s2) { // s1 < s2
9746 void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1,
9747 typecode: &container_type_1);
9748 c1 =
9749 get_copy_of_container(container: c1, typecode: &container_type_1, copy_on_write: is_cow(r: x1));
9750 if (is_cow(r: x1)) {
9751 ra_set_container_at_index(ra: &x1->high_low_container, i: pos1, c: c1,
9752 typecode: container_type_1);
9753 }
9754 ra_append(ra: &answer->high_low_container, s: s1, c: c1, typecode: container_type_1);
9755 pos1++;
9756 if (pos1 == length1) break;
9757 s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
9758
9759 } else { // s1 > s2
9760 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2,
9761 typecode: &container_type_2);
9762 c2 =
9763 get_copy_of_container(container: c2, typecode: &container_type_2, copy_on_write: is_cow(r: x2));
9764 if (is_cow(r: x2)) {
9765 ra_set_container_at_index(ra: &x2->high_low_container, i: pos2, c: c2,
9766 typecode: container_type_2);
9767 }
9768 ra_append(ra: &answer->high_low_container, s: s2, c: c2, typecode: container_type_2);
9769 pos2++;
9770 if (pos2 == length2) break;
9771 s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
9772 }
9773 }
9774 if (pos1 == length1) {
9775 ra_append_copy_range(ra: &answer->high_low_container,
9776 sa: &x2->high_low_container, start_index: pos2, end_index: length2,
9777 copy_on_write: is_cow(r: x2));
9778 } else if (pos2 == length2) {
9779 ra_append_copy_range(ra: &answer->high_low_container,
9780 sa: &x1->high_low_container, start_index: pos1, end_index: length1,
9781 copy_on_write: is_cow(r: x1));
9782 }
9783 return answer;
9784}
9785
9786void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1,
9787 const roaring_bitmap_t *x2) {
9788 assert(x1 != x2);
9789 uint8_t container_result_type = 0;
9790 int length1 = x1->high_low_container.size;
9791 const int length2 = x2->high_low_container.size;
9792
9793 if (0 == length2) return;
9794
9795 if (0 == length1) {
9796 roaring_bitmap_overwrite(dest: x1, src: x2);
9797 return;
9798 }
9799 int pos1 = 0, pos2 = 0;
9800 uint8_t container_type_1, container_type_2;
9801 uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
9802 uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
9803 while (true) {
9804 if (s1 == s2) {
9805 void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1,
9806 typecode: &container_type_1);
9807 c1 = get_writable_copy_if_shared(candidate_shared_container: c1, type: &container_type_1);
9808 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2,
9809 typecode: &container_type_2);
9810 void *c =
9811 container_lazy_ixor(c1, type1: container_type_1, c2, type2: container_type_2,
9812 result_type: &container_result_type);
9813 if (container_nonzero_cardinality(container: c, typecode: container_result_type)) {
9814 ra_set_container_at_index(ra: &x1->high_low_container, i: pos1, c,
9815 typecode: container_result_type);
9816 ++pos1;
9817 } else {
9818 container_free(container: c, typecode: container_result_type);
9819 ra_remove_at_index(ra: &x1->high_low_container, i: pos1);
9820 --length1;
9821 }
9822 ++pos2;
9823 if (pos1 == length1) break;
9824 if (pos2 == length2) break;
9825 s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
9826 s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
9827
9828 } else if (s1 < s2) { // s1 < s2
9829 pos1++;
9830 if (pos1 == length1) break;
9831 s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
9832
9833 } else { // s1 > s2
9834 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2,
9835 typecode: &container_type_2);
9836 // void *c2_clone = container_clone(c2, container_type_2);
9837 c2 =
9838 get_copy_of_container(container: c2, typecode: &container_type_2, copy_on_write: is_cow(r: x2));
9839 if (is_cow(r: x2)) {
9840 ra_set_container_at_index(ra: &x2->high_low_container, i: pos2, c: c2,
9841 typecode: container_type_2);
9842 }
9843 ra_insert_new_key_value_at(ra: &x1->high_low_container, i: pos1, key: s2, container: c2,
9844 typecode: container_type_2);
9845 pos1++;
9846 length1++;
9847 pos2++;
9848 if (pos2 == length2) break;
9849 s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
9850 }
9851 }
9852 if (pos1 == length1) {
9853 ra_append_copy_range(ra: &x1->high_low_container, sa: &x2->high_low_container,
9854 start_index: pos2, end_index: length2, copy_on_write: is_cow(r: x2));
9855 }
9856}
9857
9858void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *ra) {
9859 for (int i = 0; i < ra->high_low_container.size; ++i) {
9860 const uint8_t original_typecode = ra->high_low_container.typecodes[i];
9861 void *container = ra->high_low_container.containers[i];
9862 uint8_t new_typecode = original_typecode;
9863 void *newcontainer =
9864 container_repair_after_lazy(container, typecode: &new_typecode);
9865 ra->high_low_container.containers[i] = newcontainer;
9866 ra->high_low_container.typecodes[i] = new_typecode;
9867 }
9868}
9869
9870
9871
9872/**
9873* roaring_bitmap_rank returns the number of integers that are smaller or equal
9874* to x.
9875*/
9876uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) {
9877 uint64_t size = 0;
9878 uint32_t xhigh = x >> 16;
9879 for (int i = 0; i < bm->high_low_container.size; i++) {
9880 uint32_t key = bm->high_low_container.keys[i];
9881 if (xhigh > key) {
9882 size +=
9883 container_get_cardinality(container: bm->high_low_container.containers[i],
9884 typecode: bm->high_low_container.typecodes[i]);
9885 } else if (xhigh == key) {
9886 return size + container_rank(container: bm->high_low_container.containers[i],
9887 typecode: bm->high_low_container.typecodes[i],
9888 x: x & 0xFFFF);
9889 } else {
9890 return size;
9891 }
9892 }
9893 return size;
9894}
9895
9896/**
9897* roaring_bitmap_smallest returns the smallest value in the set.
9898* Returns UINT32_MAX if the set is empty.
9899*/
9900uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) {
9901 if (bm->high_low_container.size > 0) {
9902 void *container = bm->high_low_container.containers[0];
9903 uint8_t typecode = bm->high_low_container.typecodes[0];
9904 uint32_t key = bm->high_low_container.keys[0];
9905 uint32_t lowvalue = container_minimum(container, typecode);
9906 return lowvalue | (key << 16);
9907 }
9908 return UINT32_MAX;
9909}
9910
9911/**
9912* roaring_bitmap_smallest returns the greatest value in the set.
9913* Returns 0 if the set is empty.
9914*/
9915uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) {
9916 if (bm->high_low_container.size > 0) {
9917 void *container =
9918 bm->high_low_container.containers[bm->high_low_container.size - 1];
9919 uint8_t typecode =
9920 bm->high_low_container.typecodes[bm->high_low_container.size - 1];
9921 uint32_t key =
9922 bm->high_low_container.keys[bm->high_low_container.size - 1];
9923 uint32_t lowvalue = container_maximum(container, typecode);
9924 return lowvalue | (key << 16);
9925 }
9926 return 0;
9927}
9928
9929bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank,
9930 uint32_t *element) {
9931 void *container;
9932 uint8_t typecode;
9933 uint16_t key;
9934 uint32_t start_rank = 0;
9935 int i = 0;
9936 bool valid = false;
9937 while (!valid && i < bm->high_low_container.size) {
9938 container = bm->high_low_container.containers[i];
9939 typecode = bm->high_low_container.typecodes[i];
9940 valid =
9941 container_select(container, typecode, start_rank: &start_rank, rank, element);
9942 i++;
9943 }
9944
9945 if (valid) {
9946 key = bm->high_low_container.keys[i - 1];
9947 *element |= (key << 16);
9948 return true;
9949 } else
9950 return false;
9951}
9952
9953bool roaring_bitmap_intersect(const roaring_bitmap_t *x1,
9954 const roaring_bitmap_t *x2) {
9955 const int length1 = x1->high_low_container.size,
9956 length2 = x2->high_low_container.size;
9957 uint64_t answer = 0;
9958 int pos1 = 0, pos2 = 0;
9959
9960 while (pos1 < length1 && pos2 < length2) {
9961 const uint16_t s1 = ra_get_key_at_index(ra: & x1->high_low_container, i: pos1);
9962 const uint16_t s2 = ra_get_key_at_index(ra: & x2->high_low_container, i: pos2);
9963
9964 if (s1 == s2) {
9965 uint8_t container_type_1, container_type_2;
9966 void *c1 = ra_get_container_at_index(ra: & x1->high_low_container, i: pos1,
9967 typecode: &container_type_1);
9968 void *c2 = ra_get_container_at_index(ra: & x2->high_low_container, i: pos2,
9969 typecode: &container_type_2);
9970 if( container_intersect(c1, type1: container_type_1, c2, type2: container_type_2) ) return true;
9971 ++pos1;
9972 ++pos2;
9973 } else if (s1 < s2) { // s1 < s2
9974 pos1 = ra_advance_until(ra: & x1->high_low_container, x: s2, pos: pos1);
9975 } else { // s1 > s2
9976 pos2 = ra_advance_until(ra: & x2->high_low_container, x: s1, pos: pos2);
9977 }
9978 }
9979 return answer;
9980}
9981
9982
9983uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1,
9984 const roaring_bitmap_t *x2) {
9985 const int length1 = x1->high_low_container.size,
9986 length2 = x2->high_low_container.size;
9987 uint64_t answer = 0;
9988 int pos1 = 0, pos2 = 0;
9989
9990 while (pos1 < length1 && pos2 < length2) {
9991 const uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
9992 const uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
9993
9994 if (s1 == s2) {
9995 uint8_t container_type_1, container_type_2;
9996 void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1,
9997 typecode: &container_type_1);
9998 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2,
9999 typecode: &container_type_2);
10000 answer += container_and_cardinality(c1, type1: container_type_1, c2,
10001 type2: container_type_2);
10002 ++pos1;
10003 ++pos2;
10004 } else if (s1 < s2) { // s1 < s2
10005 pos1 = ra_advance_until(ra: &x1->high_low_container, x: s2, pos: pos1);
10006 } else { // s1 > s2
10007 pos2 = ra_advance_until(ra: &x2->high_low_container, x: s1, pos: pos2);
10008 }
10009 }
10010 return answer;
10011}
10012
10013double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1,
10014 const roaring_bitmap_t *x2) {
10015 const uint64_t c1 = roaring_bitmap_get_cardinality(ra: x1);
10016 const uint64_t c2 = roaring_bitmap_get_cardinality(ra: x2);
10017 const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
10018 return (double)inter / (double)(c1 + c2 - inter);
10019}
10020
10021uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1,
10022 const roaring_bitmap_t *x2) {
10023 const uint64_t c1 = roaring_bitmap_get_cardinality(ra: x1);
10024 const uint64_t c2 = roaring_bitmap_get_cardinality(ra: x2);
10025 const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
10026 return c1 + c2 - inter;
10027}
10028
10029uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1,
10030 const roaring_bitmap_t *x2) {
10031 const uint64_t c1 = roaring_bitmap_get_cardinality(ra: x1);
10032 const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
10033 return c1 - inter;
10034}
10035
10036uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1,
10037 const roaring_bitmap_t *x2) {
10038 const uint64_t c1 = roaring_bitmap_get_cardinality(ra: x1);
10039 const uint64_t c2 = roaring_bitmap_get_cardinality(ra: x2);
10040 const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
10041 return c1 + c2 - 2 * inter;
10042}
10043
10044
10045/**
10046 * Check whether a range of values from range_start (included) to range_end (excluded) is present
10047 */
10048bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) {
10049 if(range_end >= UINT64_C(0x100000000)) {
10050 range_end = UINT64_C(0x100000000);
10051 }
10052 if (range_start >= range_end) return true; // empty range are always contained!
10053 if (range_end - range_start == 1) return roaring_bitmap_contains(r, val: (uint32_t)range_start);
10054 uint16_t hb_rs = (uint16_t)(range_start >> 16);
10055 uint16_t hb_re = (uint16_t)((range_end - 1) >> 16);
10056 const int32_t span = hb_re - hb_rs;
10057 const int32_t hlc_sz = ra_get_size(ra: &r->high_low_container);
10058 if (hlc_sz < span + 1) {
10059 return false;
10060 }
10061 int32_t is = ra_get_index(ra: &r->high_low_container, x: hb_rs);
10062 int32_t ie = ra_get_index(ra: &r->high_low_container, x: hb_re);
10063 ie = (ie < 0 ? -ie - 1 : ie);
10064 if ((is < 0) || ((ie - is) != span)) {
10065 return false;
10066 }
10067 const uint32_t lb_rs = range_start & 0xFFFF;
10068 const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1;
10069 uint8_t typecode;
10070 void *container = ra_get_container_at_index(ra: &r->high_low_container, i: is, typecode: &typecode);
10071 if (hb_rs == hb_re) {
10072 return container_contains_range(container, range_start: lb_rs, range_end: lb_re, typecode);
10073 }
10074 if (!container_contains_range(container, range_start: lb_rs, range_end: 1 << 16, typecode)) {
10075 return false;
10076 }
10077 assert(ie < hlc_sz); // would indicate an algorithmic bug
10078 container = ra_get_container_at_index(ra: &r->high_low_container, i: ie, typecode: &typecode);
10079 if (!container_contains_range(container, range_start: 0, range_end: lb_re, typecode)) {
10080 return false;
10081 }
10082 for (int32_t i = is + 1; i < ie; ++i) {
10083 container = ra_get_container_at_index(ra: &r->high_low_container, i, typecode: &typecode);
10084 if (!container_is_full(container, typecode) ) {
10085 return false;
10086 }
10087 }
10088 return true;
10089}
10090
10091
10092bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *ra1,
10093 const roaring_bitmap_t *ra2) {
10094 return (roaring_bitmap_get_cardinality(ra: ra2) >
10095 roaring_bitmap_get_cardinality(ra: ra1) &&
10096 roaring_bitmap_is_subset(ra1, ra2));
10097}
10098
10099
10100/*
10101 * FROZEN SERIALIZATION FORMAT DESCRIPTION
10102 *
10103 * -- (beginning must be aligned by 32 bytes) --
10104 * <bitset_data> uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS * num_bitset_containers]
10105 * <run_data> rle16_t[total number of rle elements in all run containers]
10106 * <array_data> uint16_t[total number of array elements in all array containers]
10107 * <keys> uint16_t[num_containers]
10108 * <counts> uint16_t[num_containers]
10109 * <typecodes> uint8_t[num_containers]
10110 * <header> uint32_t
10111 *
10112 * <header> is a 4-byte value which is a bit union of FROZEN_COOKIE (15 bits)
10113 * and the number of containers (17 bits).
10114 *
10115 * <counts> stores number of elements for every container.
10116 * Its meaning depends on container type.
10117 * For array and bitset containers, this value is the container cardinality minus one.
10118 * For run container, it is the number of rle_t elements (n_runs).
10119 *
10120 * <bitset_data>,<array_data>,<run_data> are flat arrays of elements of
10121 * all containers of respective type.
10122 *
10123 * <*_data> and <keys> are kept close together because they are not accessed
10124 * during deserilization. This may reduce IO in case of large mapped bitmaps.
10125 * All members have their native alignments during deserilization except <header>,
10126 * which is not guaranteed to be aligned by 4 bytes.
10127 */
10128
10129size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *rb) {
10130 const roaring_array_t *ra = &rb->high_low_container;
10131 size_t num_bytes = 0;
10132 for (int32_t i = 0; i < ra->size; i++) {
10133 switch (ra->typecodes[i]) {
10134 case BITSET_CONTAINER_TYPE_CODE: {
10135 num_bytes += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
10136 break;
10137 }
10138 case RUN_CONTAINER_TYPE_CODE: {
10139 const run_container_t *run =
10140 (const run_container_t *) ra->containers[i];
10141 num_bytes += run->n_runs * sizeof(rle16_t);
10142 break;
10143 }
10144 case ARRAY_CONTAINER_TYPE_CODE: {
10145 const array_container_t *array =
10146 (const array_container_t *) ra->containers[i];
10147 num_bytes += array->cardinality * sizeof(uint16_t);
10148 break;
10149 }
10150 default:
10151 __builtin_unreachable();
10152 }
10153 }
10154 num_bytes += (2 + 2 + 1) * ra->size; // keys, counts, typecodes
10155 num_bytes += 4; // header
10156 return num_bytes;
10157}
10158
10159inline static void *arena_alloc(char **arena, size_t num_bytes) {
10160 char *res = *arena;
10161 *arena += num_bytes;
10162 return res;
10163}
10164
10165void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) {
10166 /*
10167 * Note: we do not require user to supply spicificly aligned buffer.
10168 * Thus we have to use memcpy() everywhere.
10169 */
10170
10171 const roaring_array_t *ra = &rb->high_low_container;
10172
10173 size_t bitset_zone_size = 0;
10174 size_t run_zone_size = 0;
10175 size_t array_zone_size = 0;
10176 for (int32_t i = 0; i < ra->size; i++) {
10177 switch (ra->typecodes[i]) {
10178 case BITSET_CONTAINER_TYPE_CODE: {
10179 bitset_zone_size +=
10180 BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
10181 break;
10182 }
10183 case RUN_CONTAINER_TYPE_CODE: {
10184 const run_container_t *run =
10185 (const run_container_t *) ra->containers[i];
10186 run_zone_size += run->n_runs * sizeof(rle16_t);
10187 break;
10188 }
10189 case ARRAY_CONTAINER_TYPE_CODE: {
10190 const array_container_t *array =
10191 (const array_container_t *) ra->containers[i];
10192 array_zone_size += array->cardinality * sizeof(uint16_t);
10193 break;
10194 }
10195 default:
10196 __builtin_unreachable();
10197 }
10198 }
10199
10200 uint64_t *bitset_zone = (uint64_t *)arena_alloc(arena: &buf, num_bytes: bitset_zone_size);
10201 rle16_t *run_zone = (rle16_t *)arena_alloc(arena: &buf, num_bytes: run_zone_size);
10202 uint16_t *array_zone = (uint16_t *)arena_alloc(arena: &buf, num_bytes: array_zone_size);
10203 uint16_t *key_zone = (uint16_t *)arena_alloc(arena: &buf, num_bytes: 2*ra->size);
10204 uint16_t *count_zone = (uint16_t *)arena_alloc(arena: &buf, num_bytes: 2*ra->size);
10205 uint8_t *typecode_zone = (uint8_t *)arena_alloc(arena: &buf, num_bytes: ra->size);
10206 uint32_t *header_zone = (uint32_t *)arena_alloc(arena: &buf, num_bytes: 4);
10207
10208 for (int32_t i = 0; i < ra->size; i++) {
10209 uint16_t count;
10210 switch (ra->typecodes[i]) {
10211 case BITSET_CONTAINER_TYPE_CODE: {
10212 const bitset_container_t *bitset =
10213 (const bitset_container_t *) ra->containers[i];
10214 memcpy(dest: bitset_zone, src: bitset->array,
10215 n: BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
10216 bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS;
10217 if (bitset->cardinality != BITSET_UNKNOWN_CARDINALITY) {
10218 count = bitset->cardinality - 1;
10219 } else {
10220 count = bitset_container_compute_cardinality(bitset) - 1;
10221 }
10222 break;
10223 }
10224 case RUN_CONTAINER_TYPE_CODE: {
10225 const run_container_t *run =
10226 (const run_container_t *) ra->containers[i];
10227 size_t num_bytes = run->n_runs * sizeof(rle16_t);
10228 memcpy(dest: run_zone, src: run->runs, n: num_bytes);
10229 run_zone += run->n_runs;
10230 count = run->n_runs;
10231 break;
10232 }
10233 case ARRAY_CONTAINER_TYPE_CODE: {
10234 const array_container_t *array =
10235 (const array_container_t *) ra->containers[i];
10236 size_t num_bytes = array->cardinality * sizeof(uint16_t);
10237 memcpy(dest: array_zone, src: array->array, n: num_bytes);
10238 array_zone += array->cardinality;
10239 count = array->cardinality - 1;
10240 break;
10241 }
10242 default:
10243 __builtin_unreachable();
10244 }
10245 memcpy(dest: &count_zone[i], src: &count, n: 2);
10246 }
10247 memcpy(dest: key_zone, src: ra->keys, n: ra->size * sizeof(uint16_t));
10248 memcpy(dest: typecode_zone, src: ra->typecodes, n: ra->size * sizeof(uint8_t));
10249 uint32_t header = ((uint32_t)ra->size << 15) | FROZEN_COOKIE;
10250 memcpy(dest: header_zone, src: &header, n: 4);
10251}
10252
10253const roaring_bitmap_t *
10254roaring_bitmap_frozen_view(const char *buf, size_t length) {
10255 if ((uintptr_t)buf % 32 != 0) {
10256 return NULL;
10257 }
10258
10259 // cookie and num_containers
10260 if (length < 4) {
10261 return NULL;
10262 }
10263 uint32_t header;
10264 memcpy(dest: &header, src: buf + length - 4, n: 4); // header may be misaligned
10265 if ((header & 0x7FFF) != FROZEN_COOKIE) {
10266 return NULL;
10267 }
10268 int32_t num_containers = (header >> 15);
10269
10270 // typecodes, counts and keys
10271 if (length < 4 + (size_t)num_containers * (1 + 2 + 2)) {
10272 return NULL;
10273 }
10274 uint16_t *keys = (uint16_t *)(buf + length - 4 - num_containers * 5);
10275 uint16_t *counts = (uint16_t *)(buf + length - 4 - num_containers * 3);
10276 uint8_t *typecodes = (uint8_t *)(buf + length - 4 - num_containers * 1);
10277
10278 // {bitset,array,run}_zone
10279 int32_t num_bitset_containers = 0;
10280 int32_t num_run_containers = 0;
10281 int32_t num_array_containers = 0;
10282 size_t bitset_zone_size = 0;
10283 size_t run_zone_size = 0;
10284 size_t array_zone_size = 0;
10285 for (int32_t i = 0; i < num_containers; i++) {
10286 switch (typecodes[i]) {
10287 case BITSET_CONTAINER_TYPE_CODE:
10288 num_bitset_containers++;
10289 bitset_zone_size += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
10290 break;
10291 case RUN_CONTAINER_TYPE_CODE:
10292 num_run_containers++;
10293 run_zone_size += counts[i] * sizeof(rle16_t);
10294 break;
10295 case ARRAY_CONTAINER_TYPE_CODE:
10296 num_array_containers++;
10297 array_zone_size += (counts[i] + UINT32_C(1)) * sizeof(uint16_t);
10298 break;
10299 default:
10300 return NULL;
10301 }
10302 }
10303 if (length != bitset_zone_size + run_zone_size + array_zone_size +
10304 5 * num_containers + 4) {
10305 return NULL;
10306 }
10307 uint64_t *bitset_zone = (uint64_t*) (buf);
10308 rle16_t *run_zone = (rle16_t*) (buf + bitset_zone_size);
10309 uint16_t *array_zone = (uint16_t*) (buf + bitset_zone_size + run_zone_size);
10310
10311 size_t alloc_size = 0;
10312 alloc_size += sizeof(roaring_bitmap_t);
10313 alloc_size += num_containers * sizeof(void *);
10314 alloc_size += num_bitset_containers * sizeof(bitset_container_t);
10315 alloc_size += num_run_containers * sizeof(run_container_t);
10316 alloc_size += num_array_containers * sizeof(array_container_t);
10317
10318 char *arena = (char *)malloc(size: alloc_size);
10319 if (arena == NULL) {
10320 return NULL;
10321 }
10322
10323 roaring_bitmap_t *rb = (roaring_bitmap_t *)
10324 arena_alloc(arena: &arena, num_bytes: sizeof(roaring_bitmap_t));
10325 rb->high_low_container.flags = ROARING_FLAG_FROZEN;
10326 rb->high_low_container.allocation_size = num_containers;
10327 rb->high_low_container.size = num_containers;
10328 rb->high_low_container.keys = (uint16_t *)keys;
10329 rb->high_low_container.typecodes = (uint8_t *)typecodes;
10330 rb->high_low_container.containers =
10331 (void **)arena_alloc(arena: &arena, num_bytes: sizeof(void*) * num_containers);
10332 for (int32_t i = 0; i < num_containers; i++) {
10333 switch (typecodes[i]) {
10334 case BITSET_CONTAINER_TYPE_CODE: {
10335 bitset_container_t *bitset = (bitset_container_t *)
10336 arena_alloc(arena: &arena, num_bytes: sizeof(bitset_container_t));
10337 bitset->array = bitset_zone;
10338 bitset->cardinality = counts[i] + UINT32_C(1);
10339 rb->high_low_container.containers[i] = bitset;
10340 bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS;
10341 break;
10342 }
10343 case RUN_CONTAINER_TYPE_CODE: {
10344 run_container_t *run = (run_container_t *)
10345 arena_alloc(arena: &arena, num_bytes: sizeof(run_container_t));
10346 run->capacity = counts[i];
10347 run->n_runs = counts[i];
10348 run->runs = run_zone;
10349 rb->high_low_container.containers[i] = run;
10350 run_zone += run->n_runs;
10351 break;
10352 }
10353 case ARRAY_CONTAINER_TYPE_CODE: {
10354 array_container_t *array = (array_container_t *)
10355 arena_alloc(arena: &arena, num_bytes: sizeof(array_container_t));
10356 array->capacity = counts[i] + UINT32_C(1);
10357 array->cardinality = counts[i] + UINT32_C(1);
10358 array->array = array_zone;
10359 rb->high_low_container.containers[i] = array;
10360 array_zone += counts[i] + UINT32_C(1);
10361 break;
10362 }
10363 default:
10364 free(ptr: arena);
10365 return NULL;
10366 }
10367 }
10368
10369 return rb;
10370}
10371/* end file src/roaring.c */
10372/* begin file src/roaring_array.c */
10373#include <assert.h>
10374#include <stdbool.h>
10375#include <stdio.h>
10376#include <stdlib.h>
10377#include <string.h>
10378#include <inttypes.h>
10379
10380
10381// Convention: [0,ra->size) all elements are initialized
10382// [ra->size, ra->allocation_size) is junk and contains nothing needing freeing
10383
10384static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) {
10385 // because we combine the allocations, it is not possible to use realloc
10386 /*ra->keys =
10387 (uint16_t *)realloc(ra->keys, sizeof(uint16_t) * new_capacity);
10388ra->containers =
10389 (void **)realloc(ra->containers, sizeof(void *) * new_capacity);
10390ra->typecodes =
10391 (uint8_t *)realloc(ra->typecodes, sizeof(uint8_t) * new_capacity);
10392if (!ra->keys || !ra->containers || !ra->typecodes) {
10393 free(ra->keys);
10394 free(ra->containers);
10395 free(ra->typecodes);
10396 return false;
10397}*/
10398
10399 if ( new_capacity == 0 ) {
10400 free(ptr: ra->containers);
10401 ra->containers = NULL;
10402 ra->keys = NULL;
10403 ra->typecodes = NULL;
10404 ra->allocation_size = 0;
10405 return true;
10406 }
10407 const size_t memoryneeded =
10408 new_capacity * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t));
10409 void *bigalloc = malloc(size: memoryneeded);
10410 if (!bigalloc) return false;
10411 void *oldbigalloc = ra->containers;
10412 void **newcontainers = (void **)bigalloc;
10413 uint16_t *newkeys = (uint16_t *)(newcontainers + new_capacity);
10414 uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity);
10415 assert((char *)(newtypecodes + new_capacity) ==
10416 (char *)bigalloc + memoryneeded);
10417 if(ra->size > 0) {
10418 memcpy(dest: newcontainers, src: ra->containers, n: sizeof(void *) * ra->size);
10419 memcpy(dest: newkeys, src: ra->keys, n: sizeof(uint16_t) * ra->size);
10420 memcpy(dest: newtypecodes, src: ra->typecodes, n: sizeof(uint8_t) * ra->size);
10421 }
10422 ra->containers = newcontainers;
10423 ra->keys = newkeys;
10424 ra->typecodes = newtypecodes;
10425 ra->allocation_size = new_capacity;
10426 free(ptr: oldbigalloc);
10427 return true;
10428}
10429
10430bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) {
10431 if (!new_ra) return false;
10432 ra_init(t: new_ra);
10433
10434 if (cap > INT32_MAX) { return false; }
10435
10436 if(cap > 0) {
10437 void *bigalloc =
10438 malloc(size: cap * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)));
10439 if( bigalloc == NULL ) return false;
10440 new_ra->containers = (void **)bigalloc;
10441 new_ra->keys = (uint16_t *)(new_ra->containers + cap);
10442 new_ra->typecodes = (uint8_t *)(new_ra->keys + cap);
10443 // Narrowing is safe because of above check
10444 new_ra->allocation_size = (int32_t)cap;
10445 }
10446 return true;
10447}
10448
10449int ra_shrink_to_fit(roaring_array_t *ra) {
10450 int savings = (ra->allocation_size - ra->size) *
10451 (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t));
10452 if (!realloc_array(ra, new_capacity: ra->size)) {
10453 return 0;
10454 }
10455 ra->allocation_size = ra->size;
10456 return savings;
10457}
10458
10459void ra_init(roaring_array_t *new_ra) {
10460 if (!new_ra) { return; }
10461 new_ra->keys = NULL;
10462 new_ra->containers = NULL;
10463 new_ra->typecodes = NULL;
10464
10465 new_ra->allocation_size = 0;
10466 new_ra->size = 0;
10467 new_ra->flags = 0;
10468}
10469
10470bool ra_copy(const roaring_array_t *source, roaring_array_t *dest,
10471 bool copy_on_write) {
10472 if (!ra_init_with_capacity(new_ra: dest, cap: source->size)) return false;
10473 dest->size = source->size;
10474 dest->allocation_size = source->size;
10475 if(dest->size > 0) {
10476 memcpy(dest: dest->keys, src: source->keys, n: dest->size * sizeof(uint16_t));
10477 }
10478 // we go through the containers, turning them into shared containers...
10479 if (copy_on_write) {
10480 for (int32_t i = 0; i < dest->size; ++i) {
10481 source->containers[i] = get_copy_of_container(
10482 container: source->containers[i], typecode: &source->typecodes[i], copy_on_write);
10483 }
10484 // we do a shallow copy to the other bitmap
10485 if(dest->size > 0) {
10486 memcpy(dest: dest->containers, src: source->containers,
10487 n: dest->size * sizeof(void *));
10488 memcpy(dest: dest->typecodes, src: source->typecodes,
10489 n: dest->size * sizeof(uint8_t));
10490 }
10491 } else {
10492 if(dest->size > 0) {
10493 memcpy(dest: dest->typecodes, src: source->typecodes,
10494 n: dest->size * sizeof(uint8_t));
10495 }
10496 for (int32_t i = 0; i < dest->size; i++) {
10497 dest->containers[i] =
10498 container_clone(container: source->containers[i], typecode: source->typecodes[i]);
10499 if (dest->containers[i] == NULL) {
10500 for (int32_t j = 0; j < i; j++) {
10501 container_free(container: dest->containers[j], typecode: dest->typecodes[j]);
10502 }
10503 ra_clear_without_containers(r: dest);
10504 return false;
10505 }
10506 }
10507 }
10508 return true;
10509}
10510
10511bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest,
10512 bool copy_on_write) {
10513 ra_clear_containers(ra: dest); // we are going to overwrite them
10514 if (dest->allocation_size < source->size) {
10515 if (!realloc_array(ra: dest, new_capacity: source->size)) {
10516 return false;
10517 }
10518 }
10519 dest->size = source->size;
10520 memcpy(dest: dest->keys, src: source->keys, n: dest->size * sizeof(uint16_t));
10521 // we go through the containers, turning them into shared containers...
10522 if (copy_on_write) {
10523 for (int32_t i = 0; i < dest->size; ++i) {
10524 source->containers[i] = get_copy_of_container(
10525 container: source->containers[i], typecode: &source->typecodes[i], copy_on_write);
10526 }
10527 // we do a shallow copy to the other bitmap
10528 memcpy(dest: dest->containers, src: source->containers,
10529 n: dest->size * sizeof(void *));
10530 memcpy(dest: dest->typecodes, src: source->typecodes,
10531 n: dest->size * sizeof(uint8_t));
10532 } else {
10533 memcpy(dest: dest->typecodes, src: source->typecodes,
10534 n: dest->size * sizeof(uint8_t));
10535 for (int32_t i = 0; i < dest->size; i++) {
10536 dest->containers[i] =
10537 container_clone(container: source->containers[i], typecode: source->typecodes[i]);
10538 if (dest->containers[i] == NULL) {
10539 for (int32_t j = 0; j < i; j++) {
10540 container_free(container: dest->containers[j], typecode: dest->typecodes[j]);
10541 }
10542 ra_clear_without_containers(r: dest);
10543 return false;
10544 }
10545 }
10546 }
10547 return true;
10548}
10549
10550void ra_clear_containers(roaring_array_t *ra) {
10551 for (int32_t i = 0; i < ra->size; ++i) {
10552 container_free(container: ra->containers[i], typecode: ra->typecodes[i]);
10553 }
10554}
10555
10556void ra_reset(roaring_array_t *ra) {
10557 ra_clear_containers(ra);
10558 ra->size = 0;
10559 ra_shrink_to_fit(ra);
10560}
10561
10562void ra_clear_without_containers(roaring_array_t *ra) {
10563 free(ptr: ra->containers); // keys and typecodes are allocated with containers
10564 ra->size = 0;
10565 ra->allocation_size = 0;
10566 ra->containers = NULL;
10567 ra->keys = NULL;
10568 ra->typecodes = NULL;
10569}
10570
10571void ra_clear(roaring_array_t *ra) {
10572 ra_clear_containers(ra);
10573 ra_clear_without_containers(ra);
10574}
10575
10576bool extend_array(roaring_array_t *ra, int32_t k) {
10577 int32_t desired_size = ra->size + k;
10578 assert(desired_size <= MAX_CONTAINERS);
10579 if (desired_size > ra->allocation_size) {
10580 int32_t new_capacity =
10581 (ra->size < 1024) ? 2 * desired_size : 5 * desired_size / 4;
10582 if (new_capacity > MAX_CONTAINERS) {
10583 new_capacity = MAX_CONTAINERS;
10584 }
10585
10586 return realloc_array(ra, new_capacity);
10587 }
10588 return true;
10589}
10590
10591void ra_append(roaring_array_t *ra, uint16_t key, void *container,
10592 uint8_t typecode) {
10593 extend_array(ra, k: 1);
10594 const int32_t pos = ra->size;
10595
10596 ra->keys[pos] = key;
10597 ra->containers[pos] = container;
10598 ra->typecodes[pos] = typecode;
10599 ra->size++;
10600}
10601
10602void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa,
10603 uint16_t index, bool copy_on_write) {
10604 extend_array(ra, k: 1);
10605 const int32_t pos = ra->size;
10606
10607 // old contents is junk not needing freeing
10608 ra->keys[pos] = sa->keys[index];
10609 // the shared container will be in two bitmaps
10610 if (copy_on_write) {
10611 sa->containers[index] = get_copy_of_container(
10612 container: sa->containers[index], typecode: &sa->typecodes[index], copy_on_write);
10613 ra->containers[pos] = sa->containers[index];
10614 ra->typecodes[pos] = sa->typecodes[index];
10615 } else {
10616 ra->containers[pos] =
10617 container_clone(container: sa->containers[index], typecode: sa->typecodes[index]);
10618 ra->typecodes[pos] = sa->typecodes[index];
10619 }
10620 ra->size++;
10621}
10622
10623void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa,
10624 uint16_t stopping_key, bool copy_on_write) {
10625 for (int32_t i = 0; i < sa->size; ++i) {
10626 if (sa->keys[i] >= stopping_key) break;
10627 ra_append_copy(ra, sa, index: i, copy_on_write);
10628 }
10629}
10630
10631void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa,
10632 int32_t start_index, int32_t end_index,
10633 bool copy_on_write) {
10634 extend_array(ra, k: end_index - start_index);
10635 for (int32_t i = start_index; i < end_index; ++i) {
10636 const int32_t pos = ra->size;
10637 ra->keys[pos] = sa->keys[i];
10638 if (copy_on_write) {
10639 sa->containers[i] = get_copy_of_container(
10640 container: sa->containers[i], typecode: &sa->typecodes[i], copy_on_write);
10641 ra->containers[pos] = sa->containers[i];
10642 ra->typecodes[pos] = sa->typecodes[i];
10643 } else {
10644 ra->containers[pos] =
10645 container_clone(container: sa->containers[i], typecode: sa->typecodes[i]);
10646 ra->typecodes[pos] = sa->typecodes[i];
10647 }
10648 ra->size++;
10649 }
10650}
10651
10652void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa,
10653 uint16_t before_start, bool copy_on_write) {
10654 int start_location = ra_get_index(ra: sa, x: before_start);
10655 if (start_location >= 0)
10656 ++start_location;
10657 else
10658 start_location = -start_location - 1;
10659 ra_append_copy_range(ra, sa, start_index: start_location, end_index: sa->size, copy_on_write);
10660}
10661
10662void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa,
10663 int32_t start_index, int32_t end_index) {
10664 extend_array(ra, k: end_index - start_index);
10665
10666 for (int32_t i = start_index; i < end_index; ++i) {
10667 const int32_t pos = ra->size;
10668
10669 ra->keys[pos] = sa->keys[i];
10670 ra->containers[pos] = sa->containers[i];
10671 ra->typecodes[pos] = sa->typecodes[i];
10672 ra->size++;
10673 }
10674}
10675
10676void ra_append_range(roaring_array_t *ra, roaring_array_t *sa,
10677 int32_t start_index, int32_t end_index,
10678 bool copy_on_write) {
10679 extend_array(ra, k: end_index - start_index);
10680
10681 for (int32_t i = start_index; i < end_index; ++i) {
10682 const int32_t pos = ra->size;
10683 ra->keys[pos] = sa->keys[i];
10684 if (copy_on_write) {
10685 sa->containers[i] = get_copy_of_container(
10686 container: sa->containers[i], typecode: &sa->typecodes[i], copy_on_write);
10687 ra->containers[pos] = sa->containers[i];
10688 ra->typecodes[pos] = sa->typecodes[i];
10689 } else {
10690 ra->containers[pos] =
10691 container_clone(container: sa->containers[i], typecode: sa->typecodes[i]);
10692 ra->typecodes[pos] = sa->typecodes[i];
10693 }
10694 ra->size++;
10695 }
10696}
10697
10698uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) {
10699 return ra->keys[i];
10700}
10701
10702// everything skipped over is freed
10703int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) {
10704 while (pos < ra->size && ra->keys[pos] < x) {
10705 container_free(container: ra->containers[pos], typecode: ra->typecodes[pos]);
10706 ++pos;
10707 }
10708 return pos;
10709}
10710
10711void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key,
10712 void *container, uint8_t typecode) {
10713 extend_array(ra, k: 1);
10714 // May be an optimization opportunity with DIY memmove
10715 memmove(dest: &(ra->keys[i + 1]), src: &(ra->keys[i]),
10716 n: sizeof(uint16_t) * (ra->size - i));
10717 memmove(dest: &(ra->containers[i + 1]), src: &(ra->containers[i]),
10718 n: sizeof(void *) * (ra->size - i));
10719 memmove(dest: &(ra->typecodes[i + 1]), src: &(ra->typecodes[i]),
10720 n: sizeof(uint8_t) * (ra->size - i));
10721 ra->keys[i] = key;
10722 ra->containers[i] = container;
10723 ra->typecodes[i] = typecode;
10724 ra->size++;
10725}
10726
10727// note: Java routine set things to 0, enabling GC.
10728// Java called it "resize" but it was always used to downsize.
10729// Allowing upsize would break the conventions about
10730// valid containers below ra->size.
10731
10732void ra_downsize(roaring_array_t *ra, int32_t new_length) {
10733 assert(new_length <= ra->size);
10734 ra->size = new_length;
10735}
10736
10737void ra_remove_at_index(roaring_array_t *ra, int32_t i) {
10738 memmove(dest: &(ra->containers[i]), src: &(ra->containers[i + 1]),
10739 n: sizeof(void *) * (ra->size - i - 1));
10740 memmove(dest: &(ra->keys[i]), src: &(ra->keys[i + 1]),
10741 n: sizeof(uint16_t) * (ra->size - i - 1));
10742 memmove(dest: &(ra->typecodes[i]), src: &(ra->typecodes[i + 1]),
10743 n: sizeof(uint8_t) * (ra->size - i - 1));
10744 ra->size--;
10745}
10746
10747void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i) {
10748 container_free(container: ra->containers[i], typecode: ra->typecodes[i]);
10749 ra_remove_at_index(ra, i);
10750}
10751
10752// used in inplace andNot only, to slide left the containers from
10753// the mutated RoaringBitmap that are after the largest container of
10754// the argument RoaringBitmap. In use it should be followed by a call to
10755// downsize.
10756//
10757void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end,
10758 uint32_t new_begin) {
10759 assert(begin <= end);
10760 assert(new_begin < begin);
10761
10762 const int range = end - begin;
10763
10764 // We ensure to previously have freed overwritten containers
10765 // that are not copied elsewhere
10766
10767 memmove(dest: &(ra->containers[new_begin]), src: &(ra->containers[begin]),
10768 n: sizeof(void *) * range);
10769 memmove(dest: &(ra->keys[new_begin]), src: &(ra->keys[begin]),
10770 n: sizeof(uint16_t) * range);
10771 memmove(dest: &(ra->typecodes[new_begin]), src: &(ra->typecodes[begin]),
10772 n: sizeof(uint8_t) * range);
10773}
10774
10775void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) {
10776 if (distance > 0) {
10777 extend_array(ra, k: distance);
10778 }
10779 int32_t srcpos = ra->size - count;
10780 int32_t dstpos = srcpos + distance;
10781 memmove(dest: &(ra->keys[dstpos]), src: &(ra->keys[srcpos]),
10782 n: sizeof(uint16_t) * count);
10783 memmove(dest: &(ra->containers[dstpos]), src: &(ra->containers[srcpos]),
10784 n: sizeof(void *) * count);
10785 memmove(dest: &(ra->typecodes[dstpos]), src: &(ra->typecodes[srcpos]),
10786 n: sizeof(uint8_t) * count);
10787 ra->size += distance;
10788}
10789
10790
10791void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) {
10792 size_t ctr = 0;
10793 for (int32_t i = 0; i < ra->size; ++i) {
10794 int num_added = container_to_uint32_array(
10795 output: ans + ctr, container: ra->containers[i], typecode: ra->typecodes[i],
10796 base: ((uint32_t)ra->keys[i]) << 16);
10797 ctr += num_added;
10798 }
10799}
10800
10801bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans) {
10802 size_t ctr = 0;
10803 size_t dtr = 0;
10804
10805 size_t t_limit = 0;
10806
10807 bool first = false;
10808 size_t first_skip = 0;
10809
10810 uint32_t *t_ans = NULL;
10811 size_t cur_len = 0;
10812
10813 for (int i = 0; i < ra->size; ++i) {
10814
10815 const void *container = container_unwrap_shared(candidate_shared_container: ra->containers[i], type: &ra->typecodes[i]);
10816 switch (ra->typecodes[i]) {
10817 case BITSET_CONTAINER_TYPE_CODE:
10818 t_limit = ((const bitset_container_t *)container)->cardinality;
10819 break;
10820 case ARRAY_CONTAINER_TYPE_CODE:
10821 t_limit = ((const array_container_t *)container)->cardinality;
10822 break;
10823 case RUN_CONTAINER_TYPE_CODE:
10824 t_limit = run_container_cardinality(run: (const run_container_t *)container);
10825 break;
10826 case SHARED_CONTAINER_TYPE_CODE:
10827 default:
10828 __builtin_unreachable();
10829 }
10830 if (ctr + t_limit - 1 >= offset && ctr < offset + limit){
10831 if (!first){
10832 //first_skip = t_limit - (ctr + t_limit - offset);
10833 first_skip = offset - ctr;
10834 first = true;
10835 t_ans = (uint32_t *)malloc(size: sizeof(*t_ans) * (first_skip + limit));
10836 if(t_ans == NULL) {
10837 return false;
10838 }
10839 memset(s: t_ans, c: 0, n: sizeof(*t_ans) * (first_skip + limit)) ;
10840 cur_len = first_skip + limit;
10841 }
10842 if (dtr + t_limit > cur_len){
10843 uint32_t * append_ans = (uint32_t *)malloc(size: sizeof(*append_ans) * (cur_len + t_limit));
10844 if(append_ans == NULL) {
10845 if(t_ans != NULL) free(ptr: t_ans);
10846 return false;
10847 }
10848 memset(s: append_ans, c: 0, n: sizeof(*append_ans) * (cur_len + t_limit));
10849 cur_len = cur_len + t_limit;
10850 memcpy(dest: append_ans, src: t_ans, n: dtr * sizeof(uint32_t));
10851 free(ptr: t_ans);
10852 t_ans = append_ans;
10853 }
10854 switch (ra->typecodes[i]) {
10855 case BITSET_CONTAINER_TYPE_CODE:
10856 container_to_uint32_array(
10857 output: t_ans + dtr, container: (const bitset_container_t *)container, typecode: ra->typecodes[i],
10858 base: ((uint32_t)ra->keys[i]) << 16);
10859 break;
10860 case ARRAY_CONTAINER_TYPE_CODE:
10861 container_to_uint32_array(
10862 output: t_ans + dtr, container: (const array_container_t *)container, typecode: ra->typecodes[i],
10863 base: ((uint32_t)ra->keys[i]) << 16);
10864 break;
10865 case RUN_CONTAINER_TYPE_CODE:
10866 container_to_uint32_array(
10867 output: t_ans + dtr, container: (const run_container_t *)container, typecode: ra->typecodes[i],
10868 base: ((uint32_t)ra->keys[i]) << 16);
10869 break;
10870 case SHARED_CONTAINER_TYPE_CODE:
10871 default:
10872 __builtin_unreachable();
10873 }
10874 dtr += t_limit;
10875 }
10876 ctr += t_limit;
10877 if (dtr-first_skip >= limit) break;
10878 }
10879 if(t_ans != NULL) {
10880 memcpy(dest: ans, src: t_ans+first_skip, n: limit * sizeof(uint32_t));
10881 free(ptr: t_ans);
10882 }
10883 return true;
10884}
10885
10886bool ra_has_run_container(const roaring_array_t *ra) {
10887 for (int32_t k = 0; k < ra->size; ++k) {
10888 if (get_container_type(container: ra->containers[k], type: ra->typecodes[k]) ==
10889 RUN_CONTAINER_TYPE_CODE)
10890 return true;
10891 }
10892 return false;
10893}
10894
10895uint32_t ra_portable_header_size(const roaring_array_t *ra) {
10896 if (ra_has_run_container(ra)) {
10897 if (ra->size <
10898 NO_OFFSET_THRESHOLD) { // for small bitmaps, we omit the offsets
10899 return 4 + (ra->size + 7) / 8 + 4 * ra->size;
10900 }
10901 return 4 + (ra->size + 7) / 8 +
10902 8 * ra->size; // - 4 because we pack the size with the cookie
10903 } else {
10904 return 4 + 4 + 8 * ra->size;
10905 }
10906}
10907
10908size_t ra_portable_size_in_bytes(const roaring_array_t *ra) {
10909 size_t count = ra_portable_header_size(ra);
10910
10911 for (int32_t k = 0; k < ra->size; ++k) {
10912 count += container_size_in_bytes(container: ra->containers[k], typecode: ra->typecodes[k]);
10913 }
10914 return count;
10915}
10916
10917size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) {
10918 char *initbuf = buf;
10919 uint32_t startOffset = 0;
10920 bool hasrun = ra_has_run_container(ra);
10921 if (hasrun) {
10922 uint32_t cookie = SERIAL_COOKIE | ((ra->size - 1) << 16);
10923 memcpy(dest: buf, src: &cookie, n: sizeof(cookie));
10924 buf += sizeof(cookie);
10925 uint32_t s = (ra->size + 7) / 8;
10926 uint8_t *bitmapOfRunContainers = (uint8_t *)calloc(nmemb: s, size: 1);
10927 assert(bitmapOfRunContainers != NULL); // todo: handle
10928 for (int32_t i = 0; i < ra->size; ++i) {
10929 if (get_container_type(container: ra->containers[i], type: ra->typecodes[i]) ==
10930 RUN_CONTAINER_TYPE_CODE) {
10931 bitmapOfRunContainers[i / 8] |= (1 << (i % 8));
10932 }
10933 }
10934 memcpy(dest: buf, src: bitmapOfRunContainers, n: s);
10935 buf += s;
10936 free(ptr: bitmapOfRunContainers);
10937 if (ra->size < NO_OFFSET_THRESHOLD) {
10938 startOffset = 4 + 4 * ra->size + s;
10939 } else {
10940 startOffset = 4 + 8 * ra->size + s;
10941 }
10942 } else { // backwards compatibility
10943 uint32_t cookie = SERIAL_COOKIE_NO_RUNCONTAINER;
10944
10945 memcpy(dest: buf, src: &cookie, n: sizeof(cookie));
10946 buf += sizeof(cookie);
10947 memcpy(dest: buf, src: &ra->size, n: sizeof(ra->size));
10948 buf += sizeof(ra->size);
10949
10950 startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size;
10951 }
10952 for (int32_t k = 0; k < ra->size; ++k) {
10953 memcpy(dest: buf, src: &ra->keys[k], n: sizeof(ra->keys[k]));
10954 buf += sizeof(ra->keys[k]);
10955 // get_cardinality returns a value in [1,1<<16], subtracting one
10956 // we get [0,1<<16 - 1] which fits in 16 bits
10957 uint16_t card = (uint16_t)(
10958 container_get_cardinality(container: ra->containers[k], typecode: ra->typecodes[k]) - 1);
10959 memcpy(dest: buf, src: &card, n: sizeof(card));
10960 buf += sizeof(card);
10961 }
10962 if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) {
10963 // writing the containers offsets
10964 for (int32_t k = 0; k < ra->size; k++) {
10965 memcpy(dest: buf, src: &startOffset, n: sizeof(startOffset));
10966 buf += sizeof(startOffset);
10967 startOffset =
10968 startOffset +
10969 container_size_in_bytes(container: ra->containers[k], typecode: ra->typecodes[k]);
10970 }
10971 }
10972 for (int32_t k = 0; k < ra->size; ++k) {
10973 buf += container_write(container: ra->containers[k], typecode: ra->typecodes[k], buf);
10974 }
10975 return buf - initbuf;
10976}
10977
10978// Quickly checks whether there is a serialized bitmap at the pointer,
10979// not exceeding size "maxbytes" in bytes. This function does not allocate
10980// memory dynamically.
10981//
10982// This function returns 0 if and only if no valid bitmap is found.
10983// Otherwise, it returns how many bytes are occupied.
10984//
10985size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) {
10986 size_t bytestotal = sizeof(int32_t);// for cookie
10987 if(bytestotal > maxbytes) return 0;
10988 uint32_t cookie;
10989 memcpy(dest: &cookie, src: buf, n: sizeof(int32_t));
10990 buf += sizeof(uint32_t);
10991 if ((cookie & 0xFFFF) != SERIAL_COOKIE &&
10992 cookie != SERIAL_COOKIE_NO_RUNCONTAINER) {
10993 return 0;
10994 }
10995 int32_t size;
10996
10997 if ((cookie & 0xFFFF) == SERIAL_COOKIE)
10998 size = (cookie >> 16) + 1;
10999 else {
11000 bytestotal += sizeof(int32_t);
11001 if(bytestotal > maxbytes) return 0;
11002 memcpy(dest: &size, src: buf, n: sizeof(int32_t));
11003 buf += sizeof(uint32_t);
11004 }
11005 if (size > (1<<16)) {
11006 return 0; // logically impossible
11007 }
11008 char *bitmapOfRunContainers = NULL;
11009 bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE;
11010 if (hasrun) {
11011 int32_t s = (size + 7) / 8;
11012 bytestotal += s;
11013 if(bytestotal > maxbytes) return 0;
11014 bitmapOfRunContainers = (char *)buf;
11015 buf += s;
11016 }
11017 bytestotal += size * 2 * sizeof(uint16_t);
11018 if(bytestotal > maxbytes) return 0;
11019 uint16_t *keyscards = (uint16_t *)buf;
11020 buf += size * 2 * sizeof(uint16_t);
11021 if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) {
11022 // skipping the offsets
11023 bytestotal += size * 4;
11024 if(bytestotal > maxbytes) return 0;
11025 buf += size * 4;
11026 }
11027 // Reading the containers
11028 for (int32_t k = 0; k < size; ++k) {
11029 uint16_t tmp;
11030 memcpy(dest: &tmp, src: keyscards + 2*k+1, n: sizeof(tmp));
11031 uint32_t thiscard = tmp + 1;
11032 bool isbitmap = (thiscard > DEFAULT_MAX_SIZE);
11033 bool isrun = false;
11034 if(hasrun) {
11035 if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) {
11036 isbitmap = false;
11037 isrun = true;
11038 }
11039 }
11040 if (isbitmap) {
11041 size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
11042 bytestotal += containersize;
11043 if(bytestotal > maxbytes) return 0;
11044 buf += containersize;
11045 } else if (isrun) {
11046 bytestotal += sizeof(uint16_t);
11047 if(bytestotal > maxbytes) return 0;
11048 uint16_t n_runs;
11049 memcpy(dest: &n_runs, src: buf, n: sizeof(uint16_t));
11050 buf += sizeof(uint16_t);
11051 size_t containersize = n_runs * sizeof(rle16_t);
11052 bytestotal += containersize;
11053 if(bytestotal > maxbytes) return 0;
11054 buf += containersize;
11055 } else {
11056 size_t containersize = thiscard * sizeof(uint16_t);
11057 bytestotal += containersize;
11058 if(bytestotal > maxbytes) return 0;
11059 buf += containersize;
11060 }
11061 }
11062 return bytestotal;
11063}
11064
11065
11066// this function populates answer from the content of buf (reading up to maxbytes bytes).
11067// The function returns false if a properly serialized bitmap cannot be found.
11068// if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes.
11069bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) {
11070 *readbytes = sizeof(int32_t);// for cookie
11071 if(*readbytes > maxbytes) {
11072 fprintf(stderr, format: "Ran out of bytes while reading first 4 bytes.\n");
11073 return false;
11074 }
11075 uint32_t cookie;
11076 memcpy(dest: &cookie, src: buf, n: sizeof(int32_t));
11077 buf += sizeof(uint32_t);
11078 if ((cookie & 0xFFFF) != SERIAL_COOKIE &&
11079 cookie != SERIAL_COOKIE_NO_RUNCONTAINER) {
11080 fprintf(stderr, format: "I failed to find one of the right cookies. Found %" PRIu32 "\n",
11081 cookie);
11082 return false;
11083 }
11084 int32_t size;
11085
11086 if ((cookie & 0xFFFF) == SERIAL_COOKIE)
11087 size = (cookie >> 16) + 1;
11088 else {
11089 *readbytes += sizeof(int32_t);
11090 if(*readbytes > maxbytes) {
11091 fprintf(stderr, format: "Ran out of bytes while reading second part of the cookie.\n");
11092 return false;
11093 }
11094 memcpy(dest: &size, src: buf, n: sizeof(int32_t));
11095 buf += sizeof(uint32_t);
11096 }
11097 if (size > (1<<16)) {
11098 fprintf(stderr, format: "You cannot have so many containers, the data must be corrupted: %" PRId32 "\n",
11099 size);
11100 return false; // logically impossible
11101 }
11102 const char *bitmapOfRunContainers = NULL;
11103 bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE;
11104 if (hasrun) {
11105 int32_t s = (size + 7) / 8;
11106 *readbytes += s;
11107 if(*readbytes > maxbytes) {// data is corrupted?
11108 fprintf(stderr, format: "Ran out of bytes while reading run bitmap.\n");
11109 return false;
11110 }
11111 bitmapOfRunContainers = buf;
11112 buf += s;
11113 }
11114 uint16_t *keyscards = (uint16_t *)buf;
11115
11116 *readbytes += size * 2 * sizeof(uint16_t);
11117 if(*readbytes > maxbytes) {
11118 fprintf(stderr, format: "Ran out of bytes while reading key-cardinality array.\n");
11119 return false;
11120 }
11121 buf += size * 2 * sizeof(uint16_t);
11122
11123 bool is_ok = ra_init_with_capacity(new_ra: answer, cap: size);
11124 if (!is_ok) {
11125 fprintf(stderr, format: "Failed to allocate memory for roaring array. Bailing out.\n");
11126 return false;
11127 }
11128
11129 for (int32_t k = 0; k < size; ++k) {
11130 uint16_t tmp;
11131 memcpy(dest: &tmp, src: keyscards + 2*k, n: sizeof(tmp));
11132 answer->keys[k] = tmp;
11133 }
11134 if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) {
11135 *readbytes += size * 4;
11136 if(*readbytes > maxbytes) {// data is corrupted?
11137 fprintf(stderr, format: "Ran out of bytes while reading offsets.\n");
11138 ra_clear(ra: answer);// we need to clear the containers already allocated, and the roaring array
11139 return false;
11140 }
11141
11142 // skipping the offsets
11143 buf += size * 4;
11144 }
11145 // Reading the containers
11146 for (int32_t k = 0; k < size; ++k) {
11147 uint16_t tmp;
11148 memcpy(dest: &tmp, src: keyscards + 2*k+1, n: sizeof(tmp));
11149 uint32_t thiscard = tmp + 1;
11150 bool isbitmap = (thiscard > DEFAULT_MAX_SIZE);
11151 bool isrun = false;
11152 if(hasrun) {
11153 if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) {
11154 isbitmap = false;
11155 isrun = true;
11156 }
11157 }
11158 if (isbitmap) {
11159 // we check that the read is allowed
11160 size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
11161 *readbytes += containersize;
11162 if(*readbytes > maxbytes) {
11163 fprintf(stderr, format: "Running out of bytes while reading a bitset container.\n");
11164 ra_clear(ra: answer);// we need to clear the containers already allocated, and the roaring array
11165 return false;
11166 }
11167 // it is now safe to read
11168 bitset_container_t *c = bitset_container_create();
11169 if(c == NULL) {// memory allocation failure
11170 fprintf(stderr, format: "Failed to allocate memory for a bitset container.\n");
11171 ra_clear(ra: answer);// we need to clear the containers already allocated, and the roaring array
11172 return false;
11173 }
11174 answer->size++;
11175 buf += bitset_container_read(cardinality: thiscard, container: c, buf);
11176 answer->containers[k] = c;
11177 answer->typecodes[k] = BITSET_CONTAINER_TYPE_CODE;
11178 } else if (isrun) {
11179 // we check that the read is allowed
11180 *readbytes += sizeof(uint16_t);
11181 if(*readbytes > maxbytes) {
11182 fprintf(stderr, format: "Running out of bytes while reading a run container (header).\n");
11183 ra_clear(ra: answer);// we need to clear the containers already allocated, and the roaring array
11184 return false;
11185 }
11186 uint16_t n_runs;
11187 memcpy(dest: &n_runs, src: buf, n: sizeof(uint16_t));
11188 size_t containersize = n_runs * sizeof(rle16_t);
11189 *readbytes += containersize;
11190 if(*readbytes > maxbytes) {// data is corrupted?
11191 fprintf(stderr, format: "Running out of bytes while reading a run container.\n");
11192 ra_clear(ra: answer);// we need to clear the containers already allocated, and the roaring array
11193 return false;
11194 }
11195 // it is now safe to read
11196
11197 run_container_t *c = run_container_create();
11198 if(c == NULL) {// memory allocation failure
11199 fprintf(stderr, format: "Failed to allocate memory for a run container.\n");
11200 ra_clear(ra: answer);// we need to clear the containers already allocated, and the roaring array
11201 return false;
11202 }
11203 answer->size++;
11204 buf += run_container_read(cardinality: thiscard, container: c, buf);
11205 answer->containers[k] = c;
11206 answer->typecodes[k] = RUN_CONTAINER_TYPE_CODE;
11207 } else {
11208 // we check that the read is allowed
11209 size_t containersize = thiscard * sizeof(uint16_t);
11210 *readbytes += containersize;
11211 if(*readbytes > maxbytes) {// data is corrupted?
11212 fprintf(stderr, format: "Running out of bytes while reading an array container.\n");
11213 ra_clear(ra: answer);// we need to clear the containers already allocated, and the roaring array
11214 return false;
11215 }
11216 // it is now safe to read
11217 array_container_t *c =
11218 array_container_create_given_capacity(size: thiscard);
11219 if(c == NULL) {// memory allocation failure
11220 fprintf(stderr, format: "Failed to allocate memory for an array container.\n");
11221 ra_clear(ra: answer);// we need to clear the containers already allocated, and the roaring array
11222 return false;
11223 }
11224 answer->size++;
11225 buf += array_container_read(cardinality: thiscard, container: c, buf);
11226 answer->containers[k] = c;
11227 answer->typecodes[k] = ARRAY_CONTAINER_TYPE_CODE;
11228 }
11229 }
11230 return true;
11231}
11232/* end file src/roaring_array.c */
11233/* begin file src/roaring_priority_queue.c */
11234
11235struct roaring_pq_element_s {
11236 uint64_t size;
11237 bool is_temporary;
11238 roaring_bitmap_t *bitmap;
11239};
11240
11241typedef struct roaring_pq_element_s roaring_pq_element_t;
11242
11243struct roaring_pq_s {
11244 roaring_pq_element_t *elements;
11245 uint64_t size;
11246};
11247
11248typedef struct roaring_pq_s roaring_pq_t;
11249
11250static inline bool compare(roaring_pq_element_t *t1, roaring_pq_element_t *t2) {
11251 return t1->size < t2->size;
11252}
11253
11254static void pq_add(roaring_pq_t *pq, roaring_pq_element_t *t) {
11255 uint64_t i = pq->size;
11256 pq->elements[pq->size++] = *t;
11257 while (i > 0) {
11258 uint64_t p = (i - 1) >> 1;
11259 roaring_pq_element_t ap = pq->elements[p];
11260 if (!compare(t1: t, t2: &ap)) break;
11261 pq->elements[i] = ap;
11262 i = p;
11263 }
11264 pq->elements[i] = *t;
11265}
11266
11267static void pq_free(roaring_pq_t *pq) {
11268 free(ptr: pq->elements);
11269 pq->elements = NULL; // paranoid
11270 free(ptr: pq);
11271}
11272
11273static void percolate_down(roaring_pq_t *pq, uint32_t i) {
11274 uint32_t size = (uint32_t)pq->size;
11275 uint32_t hsize = size >> 1;
11276 roaring_pq_element_t ai = pq->elements[i];
11277 while (i < hsize) {
11278 uint32_t l = (i << 1) + 1;
11279 uint32_t r = l + 1;
11280 roaring_pq_element_t bestc = pq->elements[l];
11281 if (r < size) {
11282 if (compare(t1: pq->elements + r, t2: &bestc)) {
11283 l = r;
11284 bestc = pq->elements[r];
11285 }
11286 }
11287 if (!compare(t1: &bestc, t2: &ai)) {
11288 break;
11289 }
11290 pq->elements[i] = bestc;
11291 i = l;
11292 }
11293 pq->elements[i] = ai;
11294}
11295
11296static roaring_pq_t *create_pq(const roaring_bitmap_t **arr, uint32_t length) {
11297 roaring_pq_t *answer = (roaring_pq_t *)malloc(size: sizeof(roaring_pq_t));
11298 answer->elements =
11299 (roaring_pq_element_t *)malloc(size: sizeof(roaring_pq_element_t) * length);
11300 answer->size = length;
11301 for (uint32_t i = 0; i < length; i++) {
11302 answer->elements[i].bitmap = (roaring_bitmap_t *)arr[i];
11303 answer->elements[i].is_temporary = false;
11304 answer->elements[i].size =
11305 roaring_bitmap_portable_size_in_bytes(ra: arr[i]);
11306 }
11307 for (int32_t i = (length >> 1); i >= 0; i--) {
11308 percolate_down(pq: answer, i);
11309 }
11310 return answer;
11311}
11312
11313static roaring_pq_element_t pq_poll(roaring_pq_t *pq) {
11314 roaring_pq_element_t ans = *pq->elements;
11315 if (pq->size > 1) {
11316 pq->elements[0] = pq->elements[--pq->size];
11317 percolate_down(pq, i: 0);
11318 } else
11319 --pq->size;
11320 // memmove(pq->elements,pq->elements+1,(pq->size-1)*sizeof(roaring_pq_element_t));--pq->size;
11321 return ans;
11322}
11323
11324// this function consumes and frees the inputs
11325static roaring_bitmap_t *lazy_or_from_lazy_inputs(roaring_bitmap_t *x1,
11326 roaring_bitmap_t *x2) {
11327 uint8_t container_result_type = 0;
11328 const int length1 = ra_get_size(ra: &x1->high_low_container),
11329 length2 = ra_get_size(ra: &x2->high_low_container);
11330 if (0 == length1) {
11331 roaring_bitmap_free(r: x1);
11332 return x2;
11333 }
11334 if (0 == length2) {
11335 roaring_bitmap_free(r: x2);
11336 return x1;
11337 }
11338 uint32_t neededcap = length1 > length2 ? length2 : length1;
11339 roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(cap: neededcap);
11340 int pos1 = 0, pos2 = 0;
11341 uint8_t container_type_1, container_type_2;
11342 uint16_t s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
11343 uint16_t s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
11344 while (true) {
11345 if (s1 == s2) {
11346 // todo: unsharing can be inefficient as it may create a clone where
11347 // none
11348 // is needed, but it has the benefit of being easy to reason about.
11349 ra_unshare_container_at_index(ra: &x1->high_low_container, i: pos1);
11350 void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1,
11351 typecode: &container_type_1);
11352 assert(container_type_1 != SHARED_CONTAINER_TYPE_CODE);
11353 ra_unshare_container_at_index(ra: &x2->high_low_container, i: pos2);
11354 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2,
11355 typecode: &container_type_2);
11356 assert(container_type_2 != SHARED_CONTAINER_TYPE_CODE);
11357 void *c;
11358
11359 if ((container_type_2 == BITSET_CONTAINER_TYPE_CODE) &&
11360 (container_type_1 != BITSET_CONTAINER_TYPE_CODE)) {
11361 c = container_lazy_ior(c1: c2, type1: container_type_2, c2: c1,
11362 type2: container_type_1,
11363 result_type: &container_result_type);
11364 container_free(container: c1, typecode: container_type_1);
11365 if (c != c2) {
11366 container_free(container: c2, typecode: container_type_2);
11367 }
11368 } else {
11369 c = container_lazy_ior(c1, type1: container_type_1, c2,
11370 type2: container_type_2,
11371 result_type: &container_result_type);
11372 container_free(container: c2, typecode: container_type_2);
11373 if (c != c1) {
11374 container_free(container: c1, typecode: container_type_1);
11375 }
11376 }
11377 // since we assume that the initial containers are non-empty, the
11378 // result here
11379 // can only be non-empty
11380 ra_append(ra: &answer->high_low_container, key: s1, container: c,
11381 typecode: container_result_type);
11382 ++pos1;
11383 ++pos2;
11384 if (pos1 == length1) break;
11385 if (pos2 == length2) break;
11386 s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
11387 s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
11388
11389 } else if (s1 < s2) { // s1 < s2
11390 void *c1 = ra_get_container_at_index(ra: &x1->high_low_container, i: pos1,
11391 typecode: &container_type_1);
11392 ra_append(ra: &answer->high_low_container, key: s1, container: c1, typecode: container_type_1);
11393 pos1++;
11394 if (pos1 == length1) break;
11395 s1 = ra_get_key_at_index(ra: &x1->high_low_container, i: pos1);
11396
11397 } else { // s1 > s2
11398 void *c2 = ra_get_container_at_index(ra: &x2->high_low_container, i: pos2,
11399 typecode: &container_type_2);
11400 ra_append(ra: &answer->high_low_container, key: s2, container: c2, typecode: container_type_2);
11401 pos2++;
11402 if (pos2 == length2) break;
11403 s2 = ra_get_key_at_index(ra: &x2->high_low_container, i: pos2);
11404 }
11405 }
11406 if (pos1 == length1) {
11407 ra_append_move_range(ra: &answer->high_low_container,
11408 sa: &x2->high_low_container, start_index: pos2, end_index: length2);
11409 } else if (pos2 == length2) {
11410 ra_append_move_range(ra: &answer->high_low_container,
11411 sa: &x1->high_low_container, start_index: pos1, end_index: length1);
11412 }
11413 ra_clear_without_containers(ra: &x1->high_low_container);
11414 ra_clear_without_containers(ra: &x2->high_low_container);
11415 free(ptr: x1);
11416 free(ptr: x2);
11417 return answer;
11418}
11419
11420/**
11421 * Compute the union of 'number' bitmaps using a heap. This can
11422 * sometimes be faster than roaring_bitmap_or_many which uses
11423 * a naive algorithm. Caller is responsible for freeing the
11424 * result.
11425 */
11426roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number,
11427 const roaring_bitmap_t **x) {
11428 if (number == 0) {
11429 return roaring_bitmap_create();
11430 }
11431 if (number == 1) {
11432 return roaring_bitmap_copy(r: x[0]);
11433 }
11434 roaring_pq_t *pq = create_pq(arr: x, length: number);
11435 while (pq->size > 1) {
11436 roaring_pq_element_t x1 = pq_poll(pq);
11437 roaring_pq_element_t x2 = pq_poll(pq);
11438
11439 if (x1.is_temporary && x2.is_temporary) {
11440 roaring_bitmap_t *newb =
11441 lazy_or_from_lazy_inputs(x1: x1.bitmap, x2: x2.bitmap);
11442 // should normally return a fresh new bitmap *except* that
11443 // it can return x1.bitmap or x2.bitmap in degenerate cases
11444 bool temporary = !((newb == x1.bitmap) && (newb == x2.bitmap));
11445 uint64_t bsize = roaring_bitmap_portable_size_in_bytes(ra: newb);
11446 roaring_pq_element_t newelement = {
11447 .size = bsize, .is_temporary = temporary, .bitmap = newb};
11448 pq_add(pq, t: &newelement);
11449 } else if (x2.is_temporary) {
11450 roaring_bitmap_lazy_or_inplace(x1: x2.bitmap, x2: x1.bitmap, false);
11451 x2.size = roaring_bitmap_portable_size_in_bytes(ra: x2.bitmap);
11452 pq_add(pq, t: &x2);
11453 } else if (x1.is_temporary) {
11454 roaring_bitmap_lazy_or_inplace(x1: x1.bitmap, x2: x2.bitmap, false);
11455 x1.size = roaring_bitmap_portable_size_in_bytes(ra: x1.bitmap);
11456
11457 pq_add(pq, t: &x1);
11458 } else {
11459 roaring_bitmap_t *newb =
11460 roaring_bitmap_lazy_or(x1: x1.bitmap, x2: x2.bitmap, false);
11461 uint64_t bsize = roaring_bitmap_portable_size_in_bytes(ra: newb);
11462 roaring_pq_element_t newelement = {
11463 .size = bsize, .is_temporary = true, .bitmap = newb};
11464
11465 pq_add(pq, t: &newelement);
11466 }
11467 }
11468 roaring_pq_element_t X = pq_poll(pq);
11469 roaring_bitmap_t *answer = X.bitmap;
11470 roaring_bitmap_repair_after_lazy(ra: answer);
11471 pq_free(pq);
11472 return answer;
11473}
11474/* end file src/roaring_priority_queue.c */
11475

source code of gtk/gtk/roaring/roaring.c