1 | //! Trailing Bit Manipulation (TBM) instruction set. |
2 | //! |
3 | //! The reference is [AMD64 Architecture Programmer's Manual, Volume 3: |
4 | //! General-Purpose and System Instructions][amd64_ref]. |
5 | //! |
6 | //! [Wikipedia][wikipedia_bmi] provides a quick overview of the available |
7 | //! instructions. |
8 | //! |
9 | //! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf |
10 | //! [wikipedia_bmi]: |
11 | //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 |
12 | |
13 | #[cfg (test)] |
14 | use stdarch_test::assert_instr; |
15 | |
16 | // FIXME(blocked on #248) |
17 | // TODO: LLVM-CODEGEN ERROR: LLVM ERROR: Cannot select: |
18 | // intrinsic %llvm.x86.tbm.bextri.u32 |
19 | /* |
20 | #[allow(dead_code)] |
21 | extern "C" { |
22 | #[link_name="llvm.x86.tbm.bextri.u32"] |
23 | fn x86_tbm_bextri_u32(a: u32, y: u32) -> u32; |
24 | #[link_name="llvm.x86.tbm.bextri.u64"] |
25 | fn x86_tbm_bextri_u64(x: u64, y: u64) -> u64; |
26 | } |
27 | |
28 | /// Extracts bits in range [`start`, `start` + `length`) from `a` into |
29 | /// the least significant bits of the result. |
30 | #[inline] |
31 | #[target_feature(enable = "tbm")] |
32 | pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 { |
33 | _bextr2_u32(a, (start & 0xffu32) | ((len & 0xffu32) << 8u32)) |
34 | } |
35 | |
36 | /// Extracts bits in range [`start`, `start` + `length`) from `a` into |
37 | /// the least significant bits of the result. |
38 | #[inline] |
39 | #[target_feature(enable = "tbm")] |
40 | pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 { |
41 | _bextr2_u64(a, (start & 0xffu64) | ((len & 0xffu64) << 8u64)) |
42 | } |
43 | |
44 | /// Extracts bits of `a` specified by `control` into |
45 | /// the least significant bits of the result. |
46 | /// |
47 | /// Bits `[7,0]` of `control` specify the index to the first bit in the range to |
48 | /// be extracted, and bits `[15,8]` specify the length of the range. |
49 | #[inline] |
50 | #[target_feature(enable = "tbm")] |
51 | pub fn _bextr2_u32(a: u32, control: u32) -> u32 { |
52 | unsafe { x86_tbm_bextri_u32(a, control) } |
53 | } |
54 | |
55 | /// Extracts bits of `a` specified by `control` into |
56 | /// the least significant bits of the result. |
57 | /// |
58 | /// Bits `[7,0]` of `control` specify the index to the first bit in the range to |
59 | /// be extracted, and bits `[15,8]` specify the length of the range. |
60 | #[inline] |
61 | #[target_feature(enable = "tbm")] |
62 | pub fn _bextr2_u64(a: u64, control: u64) -> u64 { |
63 | unsafe { x86_tbm_bextri_u64(a, control) } |
64 | } |
65 | */ |
66 | |
67 | /// Clears all bits below the least significant zero bit of `x`. |
68 | /// |
69 | /// If there is no zero bit in `x`, it returns zero. |
70 | #[inline ] |
71 | #[target_feature (enable = "tbm" )] |
72 | #[cfg_attr (test, assert_instr(blcfill))] |
73 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
74 | pub unsafe fn _blcfill_u32(x: u32) -> u32 { |
75 | x & (x.wrapping_add(1)) |
76 | } |
77 | |
78 | /// Clears all bits below the least significant zero bit of `x`. |
79 | /// |
80 | /// If there is no zero bit in `x`, it returns zero. |
81 | #[inline ] |
82 | #[target_feature (enable = "tbm" )] |
83 | #[cfg_attr (test, assert_instr(blcfill))] |
84 | #[cfg (not(target_arch = "x86" ))] // generates lots of instructions |
85 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
86 | pub unsafe fn _blcfill_u64(x: u64) -> u64 { |
87 | x & (x.wrapping_add(1)) |
88 | } |
89 | |
90 | /// Sets all bits of `x` to 1 except for the least significant zero bit. |
91 | /// |
92 | /// If there is no zero bit in `x`, it sets all bits. |
93 | #[inline ] |
94 | #[target_feature (enable = "tbm" )] |
95 | #[cfg_attr (test, assert_instr(blci))] |
96 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
97 | pub unsafe fn _blci_u32(x: u32) -> u32 { |
98 | x | !(x.wrapping_add(1)) |
99 | } |
100 | |
101 | /// Sets all bits of `x` to 1 except for the least significant zero bit. |
102 | /// |
103 | /// If there is no zero bit in `x`, it sets all bits. |
104 | #[inline ] |
105 | #[target_feature (enable = "tbm" )] |
106 | #[cfg_attr (test, assert_instr(blci))] |
107 | #[cfg (not(target_arch = "x86" ))] // generates lots of instructions |
108 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
109 | pub unsafe fn _blci_u64(x: u64) -> u64 { |
110 | x | !(x.wrapping_add(1)) |
111 | } |
112 | |
113 | /// Sets the least significant zero bit of `x` and clears all other bits. |
114 | /// |
115 | /// If there is no zero bit in `x`, it returns zero. |
116 | #[inline ] |
117 | #[target_feature (enable = "tbm" )] |
118 | #[cfg_attr (test, assert_instr(blcic))] |
119 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
120 | pub unsafe fn _blcic_u32(x: u32) -> u32 { |
121 | !x & (x.wrapping_add(1)) |
122 | } |
123 | |
124 | /// Sets the least significant zero bit of `x` and clears all other bits. |
125 | /// |
126 | /// If there is no zero bit in `x`, it returns zero. |
127 | #[inline ] |
128 | #[target_feature (enable = "tbm" )] |
129 | #[cfg_attr (test, assert_instr(blcic))] |
130 | #[cfg (not(target_arch = "x86" ))] // generates lots of instructions |
131 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
132 | pub unsafe fn _blcic_u64(x: u64) -> u64 { |
133 | !x & (x.wrapping_add(1)) |
134 | } |
135 | |
136 | /// Sets the least significant zero bit of `x` and clears all bits above |
137 | /// that bit. |
138 | /// |
139 | /// If there is no zero bit in `x`, it sets all the bits. |
140 | #[inline ] |
141 | #[target_feature (enable = "tbm" )] |
142 | #[cfg_attr (test, assert_instr(blcmsk))] |
143 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
144 | pub unsafe fn _blcmsk_u32(x: u32) -> u32 { |
145 | x ^ (x.wrapping_add(1)) |
146 | } |
147 | |
148 | /// Sets the least significant zero bit of `x` and clears all bits above |
149 | /// that bit. |
150 | /// |
151 | /// If there is no zero bit in `x`, it sets all the bits. |
152 | #[inline ] |
153 | #[target_feature (enable = "tbm" )] |
154 | #[cfg_attr (test, assert_instr(blcmsk))] |
155 | #[cfg (not(target_arch = "x86" ))] // generates lots of instructions |
156 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
157 | pub unsafe fn _blcmsk_u64(x: u64) -> u64 { |
158 | x ^ (x.wrapping_add(1)) |
159 | } |
160 | |
161 | /// Sets the least significant zero bit of `x`. |
162 | /// |
163 | /// If there is no zero bit in `x`, it returns `x`. |
164 | #[inline ] |
165 | #[target_feature (enable = "tbm" )] |
166 | #[cfg_attr (test, assert_instr(blcs))] |
167 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
168 | pub unsafe fn _blcs_u32(x: u32) -> u32 { |
169 | x | (x.wrapping_add(1)) |
170 | } |
171 | |
172 | /// Sets the least significant zero bit of `x`. |
173 | /// |
174 | /// If there is no zero bit in `x`, it returns `x`. |
175 | #[inline ] |
176 | #[target_feature (enable = "tbm" )] |
177 | #[cfg_attr (test, assert_instr(blcs))] |
178 | #[cfg (not(target_arch = "x86" ))] // generates lots of instructions |
179 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
180 | pub unsafe fn _blcs_u64(x: u64) -> u64 { |
181 | x | x.wrapping_add(1) |
182 | } |
183 | |
184 | /// Sets all bits of `x` below the least significant one. |
185 | /// |
186 | /// If there is no set bit in `x`, it sets all the bits. |
187 | #[inline ] |
188 | #[target_feature (enable = "tbm" )] |
189 | #[cfg_attr (test, assert_instr(blsfill))] |
190 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
191 | pub unsafe fn _blsfill_u32(x: u32) -> u32 { |
192 | x | (x.wrapping_sub(1)) |
193 | } |
194 | |
195 | /// Sets all bits of `x` below the least significant one. |
196 | /// |
197 | /// If there is no set bit in `x`, it sets all the bits. |
198 | #[inline ] |
199 | #[target_feature (enable = "tbm" )] |
200 | #[cfg_attr (test, assert_instr(blsfill))] |
201 | #[cfg (not(target_arch = "x86" ))] // generates lots of instructions |
202 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
203 | pub unsafe fn _blsfill_u64(x: u64) -> u64 { |
204 | x | (x.wrapping_sub(1)) |
205 | } |
206 | |
207 | /// Clears least significant bit and sets all other bits. |
208 | /// |
209 | /// If there is no set bit in `x`, it sets all the bits. |
210 | #[inline ] |
211 | #[target_feature (enable = "tbm" )] |
212 | #[cfg_attr (test, assert_instr(blsic))] |
213 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
214 | pub unsafe fn _blsic_u32(x: u32) -> u32 { |
215 | !x | (x.wrapping_sub(1)) |
216 | } |
217 | |
218 | /// Clears least significant bit and sets all other bits. |
219 | /// |
220 | /// If there is no set bit in `x`, it sets all the bits. |
221 | #[inline ] |
222 | #[target_feature (enable = "tbm" )] |
223 | #[cfg_attr (test, assert_instr(blsic))] |
224 | #[cfg (not(target_arch = "x86" ))] // generates lots of instructions |
225 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
226 | pub unsafe fn _blsic_u64(x: u64) -> u64 { |
227 | !x | (x.wrapping_sub(1)) |
228 | } |
229 | |
230 | /// Clears all bits below the least significant zero of `x` and sets all other |
231 | /// bits. |
232 | /// |
233 | /// If the least significant bit of `x` is `0`, it sets all bits. |
234 | #[inline ] |
235 | #[target_feature (enable = "tbm" )] |
236 | #[cfg_attr (test, assert_instr(t1mskc))] |
237 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
238 | pub unsafe fn _t1mskc_u32(x: u32) -> u32 { |
239 | !x | (x.wrapping_add(1)) |
240 | } |
241 | |
242 | /// Clears all bits below the least significant zero of `x` and sets all other |
243 | /// bits. |
244 | /// |
245 | /// If the least significant bit of `x` is `0`, it sets all bits. |
246 | #[inline ] |
247 | #[target_feature (enable = "tbm" )] |
248 | #[cfg_attr (test, assert_instr(t1mskc))] |
249 | #[cfg (not(target_arch = "x86" ))] // generates lots of instructions |
250 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
251 | pub unsafe fn _t1mskc_u64(x: u64) -> u64 { |
252 | !x | (x.wrapping_add(1)) |
253 | } |
254 | |
255 | /// Sets all bits below the least significant one of `x` and clears all other |
256 | /// bits. |
257 | /// |
258 | /// If the least significant bit of `x` is 1, it returns zero. |
259 | #[inline ] |
260 | #[target_feature (enable = "tbm" )] |
261 | #[cfg_attr (test, assert_instr(tzmsk))] |
262 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
263 | pub unsafe fn _tzmsk_u32(x: u32) -> u32 { |
264 | !x & (x.wrapping_sub(1)) |
265 | } |
266 | |
267 | /// Sets all bits below the least significant one of `x` and clears all other |
268 | /// bits. |
269 | /// |
270 | /// If the least significant bit of `x` is 1, it returns zero. |
271 | #[inline ] |
272 | #[target_feature (enable = "tbm" )] |
273 | #[cfg_attr (test, assert_instr(tzmsk))] |
274 | #[cfg (not(target_arch = "x86" ))] // generates lots of instructions |
275 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
276 | pub unsafe fn _tzmsk_u64(x: u64) -> u64 { |
277 | !x & (x.wrapping_sub(1)) |
278 | } |
279 | |
280 | #[cfg (test)] |
281 | mod tests { |
282 | use stdarch_test::simd_test; |
283 | |
284 | use crate::core_arch::x86::*; |
285 | |
286 | /* |
287 | #[simd_test(enable = "tbm")] |
288 | unsafe fn test_bextr_u32() { |
289 | assert_eq!(_bextr_u32(0b0101_0000u32, 4, 4), 0b0000_0101u32); |
290 | } |
291 | |
292 | #[simd_test(enable = "tbm")] |
293 | unsafe fn test_bextr_u64() { |
294 | assert_eq!(_bextr_u64(0b0101_0000u64, 4, 4), 0b0000_0101u64); |
295 | } |
296 | */ |
297 | |
298 | #[simd_test(enable = "tbm" )] |
299 | unsafe fn test_blcfill_u32() { |
300 | assert_eq!(_blcfill_u32(0b0101_0111u32), 0b0101_0000u32); |
301 | assert_eq!(_blcfill_u32(0b1111_1111u32), 0u32); |
302 | } |
303 | |
304 | #[simd_test(enable = "tbm" )] |
305 | #[cfg (not(target_arch = "x86" ))] |
306 | unsafe fn test_blcfill_u64() { |
307 | assert_eq!(_blcfill_u64(0b0101_0111u64), 0b0101_0000u64); |
308 | assert_eq!(_blcfill_u64(0b1111_1111u64), 0u64); |
309 | } |
310 | |
311 | #[simd_test(enable = "tbm" )] |
312 | unsafe fn test_blci_u32() { |
313 | assert_eq!( |
314 | _blci_u32(0b0101_0000u32), |
315 | 0b1111_1111_1111_1111_1111_1111_1111_1110u32 |
316 | ); |
317 | assert_eq!( |
318 | _blci_u32(0b1111_1111u32), |
319 | 0b1111_1111_1111_1111_1111_1110_1111_1111u32 |
320 | ); |
321 | } |
322 | |
323 | #[simd_test(enable = "tbm" )] |
324 | #[cfg (not(target_arch = "x86" ))] |
325 | #[rustfmt::skip] |
326 | unsafe fn test_blci_u64() { |
327 | assert_eq!( |
328 | _blci_u64(0b0101_0000u64), |
329 | 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110u64 |
330 | ); |
331 | assert_eq!( |
332 | _blci_u64(0b1111_1111u64), |
333 | 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110_1111_1111u64 |
334 | ); |
335 | } |
336 | |
337 | #[simd_test(enable = "tbm" )] |
338 | unsafe fn test_blcic_u32() { |
339 | assert_eq!(_blcic_u32(0b0101_0001u32), 0b0000_0010u32); |
340 | assert_eq!(_blcic_u32(0b1111_1111u32), 0b1_0000_0000u32); |
341 | } |
342 | |
343 | #[simd_test(enable = "tbm" )] |
344 | #[cfg (not(target_arch = "x86" ))] |
345 | unsafe fn test_blcic_u64() { |
346 | assert_eq!(_blcic_u64(0b0101_0001u64), 0b0000_0010u64); |
347 | assert_eq!(_blcic_u64(0b1111_1111u64), 0b1_0000_0000u64); |
348 | } |
349 | |
350 | #[simd_test(enable = "tbm" )] |
351 | unsafe fn test_blcmsk_u32() { |
352 | assert_eq!(_blcmsk_u32(0b0101_0001u32), 0b0000_0011u32); |
353 | assert_eq!(_blcmsk_u32(0b1111_1111u32), 0b1_1111_1111u32); |
354 | } |
355 | |
356 | #[simd_test(enable = "tbm" )] |
357 | #[cfg (not(target_arch = "x86" ))] |
358 | unsafe fn test_blcmsk_u64() { |
359 | assert_eq!(_blcmsk_u64(0b0101_0001u64), 0b0000_0011u64); |
360 | assert_eq!(_blcmsk_u64(0b1111_1111u64), 0b1_1111_1111u64); |
361 | } |
362 | |
363 | #[simd_test(enable = "tbm" )] |
364 | unsafe fn test_blcs_u32() { |
365 | assert_eq!(_blcs_u32(0b0101_0001u32), 0b0101_0011u32); |
366 | assert_eq!(_blcs_u32(0b1111_1111u32), 0b1_1111_1111u32); |
367 | } |
368 | |
369 | #[simd_test(enable = "tbm" )] |
370 | #[cfg (not(target_arch = "x86" ))] |
371 | unsafe fn test_blcs_u64() { |
372 | assert_eq!(_blcs_u64(0b0101_0001u64), 0b0101_0011u64); |
373 | assert_eq!(_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64); |
374 | } |
375 | |
376 | #[simd_test(enable = "tbm" )] |
377 | unsafe fn test_blsfill_u32() { |
378 | assert_eq!(_blsfill_u32(0b0101_0100u32), 0b0101_0111u32); |
379 | assert_eq!( |
380 | _blsfill_u32(0u32), |
381 | 0b1111_1111_1111_1111_1111_1111_1111_1111u32 |
382 | ); |
383 | } |
384 | |
385 | #[simd_test(enable = "tbm" )] |
386 | #[cfg (not(target_arch = "x86" ))] |
387 | #[rustfmt::skip] |
388 | unsafe fn test_blsfill_u64() { |
389 | assert_eq!(_blsfill_u64(0b0101_0100u64), 0b0101_0111u64); |
390 | assert_eq!( |
391 | _blsfill_u64(0u64), |
392 | 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64 |
393 | ); |
394 | } |
395 | |
396 | #[simd_test(enable = "tbm" )] |
397 | unsafe fn test_blsic_u32() { |
398 | assert_eq!( |
399 | _blsic_u32(0b0101_0100u32), |
400 | 0b1111_1111_1111_1111_1111_1111_1111_1011u32 |
401 | ); |
402 | assert_eq!( |
403 | _blsic_u32(0u32), |
404 | 0b1111_1111_1111_1111_1111_1111_1111_1111u32 |
405 | ); |
406 | } |
407 | |
408 | #[simd_test(enable = "tbm" )] |
409 | #[cfg (not(target_arch = "x86" ))] |
410 | #[rustfmt::skip] |
411 | unsafe fn test_blsic_u64() { |
412 | assert_eq!( |
413 | _blsic_u64(0b0101_0100u64), |
414 | 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1011u64 |
415 | ); |
416 | assert_eq!( |
417 | _blsic_u64(0u64), |
418 | 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64 |
419 | ); |
420 | } |
421 | |
422 | #[simd_test(enable = "tbm" )] |
423 | unsafe fn test_t1mskc_u32() { |
424 | assert_eq!( |
425 | _t1mskc_u32(0b0101_0111u32), |
426 | 0b1111_1111_1111_1111_1111_1111_1111_1000u32 |
427 | ); |
428 | assert_eq!( |
429 | _t1mskc_u32(0u32), |
430 | 0b1111_1111_1111_1111_1111_1111_1111_1111u32 |
431 | ); |
432 | } |
433 | |
434 | #[simd_test(enable = "tbm" )] |
435 | #[cfg (not(target_arch = "x86" ))] |
436 | #[rustfmt::skip] |
437 | unsafe fn test_t1mksc_u64() { |
438 | assert_eq!( |
439 | _t1mskc_u64(0b0101_0111u64), |
440 | 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1000u64 |
441 | ); |
442 | assert_eq!( |
443 | _t1mskc_u64(0u64), |
444 | 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64 |
445 | ); |
446 | } |
447 | |
448 | #[simd_test(enable = "tbm" )] |
449 | unsafe fn test_tzmsk_u32() { |
450 | assert_eq!(_tzmsk_u32(0b0101_1000u32), 0b0000_0111u32); |
451 | assert_eq!(_tzmsk_u32(0b0101_1001u32), 0b0000_0000u32); |
452 | } |
453 | |
454 | #[simd_test(enable = "tbm" )] |
455 | #[cfg (not(target_arch = "x86" ))] |
456 | unsafe fn test_tzmsk_u64() { |
457 | assert_eq!(_tzmsk_u64(0b0101_1000u64), 0b0000_0111u64); |
458 | assert_eq!(_tzmsk_u64(0b0101_1001u64), 0b0000_0000u64); |
459 | } |
460 | } |
461 | |