| 1 | //! Implementations for `uN::extract_bits` and `uN::deposit_bits` |
| 2 | //! |
| 3 | //! For the purposes of this implementation, the operations can be thought |
| 4 | //! of as operating on the input bits as a list, starting from the least |
| 5 | //! significant bit. Extraction is like `Vec::retain` that deletes bits |
| 6 | //! where the mask has a zero. Deposition is like doing the inverse by |
| 7 | //! inserting the zeros that extraction would delete. |
| 8 | //! |
| 9 | //! Key observation: Each extracted or deposited bit needs to be |
| 10 | //! shifted by the count of zeros up to the corresponding mask bit. |
| 11 | //! |
| 12 | //! With that in mind, the general idea is to decompose the operation into |
| 13 | //! a sequence of stages in `0..log2(BITS)`, where each stage shifts some |
| 14 | //! of the bits by `n = 1 << stage`. The masks for each stage are computed |
| 15 | //! via prefix counts of zeros in the mask. |
| 16 | //! |
| 17 | //! # Extraction |
| 18 | //! |
| 19 | //! Consider the input as a sequence of runs of data (bitstrings A,B,C,...), |
| 20 | //! split by fixed-width groups of zeros ('.'), initially at width `n = 1`. |
| 21 | //! Counting the groups of zeros, each stage shifts the odd-indexed runs of |
| 22 | //! data right by `n`, effectively swapping them with the preceding zeros. |
| 23 | //! For the next stage, `n` is doubled as all the zeros are now paired. |
| 24 | //! ```text |
| 25 | //! .A.B.C.D.E.F.G.H |
| 26 | //! ..AB..CD..EF..GH |
| 27 | //! ....ABCD....EFGH |
| 28 | //! ........ABCDEFGH |
| 29 | //! ``` |
| 30 | //! What makes this nontrivial is that the lengths of the bitstrings are not |
| 31 | //! the same. Using lowercase for individual bits, the above might look like |
| 32 | //! ```text |
| 33 | //! .a.bbb.ccccc.dd.e..g.hh |
| 34 | //! ..abbb..cccccdd..e..ghh |
| 35 | //! ....abbbcccccdd....eghh |
| 36 | //! ........abbbcccccddeghh |
| 37 | //! ``` |
| 38 | //! |
| 39 | //! # Deposition |
| 40 | //! |
| 41 | //! For `deposit_bits`, the stages are reversed. We start with a single run of |
| 42 | //! data in the low bits. Each stage then splits each run of data in two by |
| 43 | //! shifting part of it left by `n`, which is halved each stage. |
| 44 | //! ```text |
| 45 | //! ........ABCDEFGH |
| 46 | //! ....ABCD....EFGH |
| 47 | //! ..AB..CD..EF..GH |
| 48 | //! .A.B.C.D.E.F.G.H |
| 49 | //! ``` |
| 50 | //! |
| 51 | //! # Stage masks |
| 52 | //! |
| 53 | //! To facilitate the shifts at each stage, we compute a mask that covers both |
| 54 | //! the bitstrings to shift, and the zeros they shift into. |
| 55 | //! ```text |
| 56 | //! .A.B.C.D.E.F.G.H |
| 57 | //! ## ## ## ## |
| 58 | //! ..AB..CD..EF..GH |
| 59 | //! #### #### |
| 60 | //! ....ABCD....EFGH |
| 61 | //! ######## |
| 62 | //! ........ABCDEFGH |
| 63 | //! ``` |
| 64 | |
| 65 | macro_rules! uint_impl { |
| 66 | ($U:ident) => { |
| 67 | pub(super) mod $U { |
| 68 | const STAGES: usize = $U::BITS.ilog2() as usize; |
| 69 | #[inline] |
| 70 | const fn prepare(sparse: $U) -> [$U; STAGES] { |
| 71 | // We'll start with `zeros` as a mask of the bits to be removed, |
| 72 | // and compute into `masks` the parts that shift at each stage. |
| 73 | let mut zeros = !sparse; |
| 74 | let mut masks = [0; STAGES]; |
| 75 | let mut stage = 0; |
| 76 | while stage < STAGES { |
| 77 | let n = 1 << stage; |
| 78 | // Suppose `zeros` has bits set at ranges `{ a..a+n, b..b+n, ... }`. |
| 79 | // Then `parity` will be computed as `{ a.. } XOR { b.. } XOR ...`, |
| 80 | // which will be the ranges `{ a..b, c..d, e.. }`. |
| 81 | let mut parity = zeros; |
| 82 | let mut len = n; |
| 83 | while len < $U::BITS { |
| 84 | parity ^= parity << len; |
| 85 | len <<= 1; |
| 86 | } |
| 87 | masks[stage] = parity; |
| 88 | |
| 89 | // Toggle off the bits that are shifted into: |
| 90 | // { a..a+n, b..b+n, ... } & !{ a..b, c..d, e.. } |
| 91 | // == { b..b+n, d..d+n, ... } |
| 92 | zeros &= !parity; |
| 93 | // Expand the remaining ranges down to the bits that were |
| 94 | // shifted from: { b-n..b+n, d-n..d+n, ... } |
| 95 | zeros ^= zeros >> n; |
| 96 | |
| 97 | stage += 1; |
| 98 | } |
| 99 | masks |
| 100 | } |
| 101 | |
| 102 | #[inline(always)] |
| 103 | pub(in super::super) const fn extract_impl(mut x: $U, sparse: $U) -> $U { |
| 104 | let masks = prepare(sparse); |
| 105 | x &= sparse; |
| 106 | let mut stage = 0; |
| 107 | while stage < STAGES { |
| 108 | let n = 1 << stage; |
| 109 | // Consider each two runs of data with their leading |
| 110 | // groups of `n` 0-bits. Suppose that the run that is |
| 111 | // shifted right has length `a`, and the other one has |
| 112 | // length `b`. Assume that only zeros are shifted in. |
| 113 | // ```text |
| 114 | // [0; n], [X; a], [0; n], [Y; b] // x |
| 115 | // [0; n], [X; a], [0; n], [0; b] // q |
| 116 | // [0; n], [0; a + n], [Y; b] // x ^= q |
| 117 | // [0; n + n], [X; a], [0; b] // q >> n |
| 118 | // [0; n], [0; n], [X; a], [Y; b] // x ^= q << n |
| 119 | // ``` |
| 120 | // Only zeros are shifted out, satisfying the assumption |
| 121 | // for the next group. |
| 122 | |
| 123 | // In effect, the upper run of data is swapped with the |
| 124 | // group of `n` zeros below it. |
| 125 | let q = x & masks[stage]; |
| 126 | x ^= q; |
| 127 | x ^= q >> n; |
| 128 | |
| 129 | stage += 1; |
| 130 | } |
| 131 | x |
| 132 | } |
| 133 | #[inline(always)] |
| 134 | pub(in super::super) const fn deposit_impl(mut x: $U, sparse: $U) -> $U { |
| 135 | let masks = prepare(sparse); |
| 136 | let mut stage = STAGES; |
| 137 | while stage > 0 { |
| 138 | stage -= 1; |
| 139 | let n = 1 << stage; |
| 140 | // Consider each run of data with the `2 * n` arbitrary bits |
| 141 | // above it. Suppose that the run has length `a + b`, with |
| 142 | // `a` being the length of the part that needs to be |
| 143 | // shifted. Assume that only zeros are shifted in. |
| 144 | // ```text |
| 145 | // [_; n], [_; n], [X; a], [Y; b] // x |
| 146 | // [0; n], [_; n], [X; a], [0; b] // q |
| 147 | // [_; n], [0; n + a], [Y; b] // x ^= q |
| 148 | // [_; n], [X; a], [0; b + n] // q << n |
| 149 | // [_; n], [X; a], [0; n], [Y; b] // x ^= q << n |
| 150 | // ``` |
| 151 | // Only zeros are shifted out, satisfying the assumption |
| 152 | // for the next group. |
| 153 | |
| 154 | // In effect, `n` 0-bits are inserted somewhere in each run |
| 155 | // of data to spread it, and the two groups of `n` bits |
| 156 | // above are XOR'd together. |
| 157 | let q = x & masks[stage]; |
| 158 | x ^= q; |
| 159 | x ^= q << n; |
| 160 | } |
| 161 | x & sparse |
| 162 | } |
| 163 | } |
| 164 | }; |
| 165 | } |
| 166 | |
| 167 | uint_impl!(u8); |
| 168 | uint_impl!(u16); |
| 169 | uint_impl!(u32); |
| 170 | uint_impl!(u64); |
| 171 | uint_impl!(u128); |
| 172 | |