1use crate::bytes;
2use crate::crc32_table::{TABLE, TABLE16};
3
4/// Provides a simple API to generate "masked" CRC32C checksums specifically
5/// for use in Snappy. When available, this will make use of SSE 4.2 to compute
6/// checksums. Otherwise, it falls back to only-marginally-slower "slicing by
7/// 16" technique.
8///
9/// The main purpose of this type is to cache the CPU feature check and expose
10/// a safe API.
11#[derive(Clone, Copy, Debug)]
12pub struct CheckSummer {
13 sse42: bool,
14}
15
16impl CheckSummer {
17 /// Create a new checksummer that can compute CRC32C checksums on arbitrary
18 /// bytes.
19 #[cfg(not(target_arch = "x86_64"))]
20 pub fn new() -> CheckSummer {
21 CheckSummer { sse42: false }
22 }
23
24 /// Create a new checksummer that can compute CRC32C checksums on arbitrary
25 /// bytes.
26 #[cfg(target_arch = "x86_64")]
27 pub fn new() -> CheckSummer {
28 CheckSummer { sse42: is_x86_feature_detected!("sse4.2") }
29 }
30
31 /// Returns the "masked" CRC32 checksum of `buf` using the Castagnoli
32 /// polynomial. This "masked" checksum is defined by the Snappy frame
33 /// format. Masking is supposed to make the checksum robust with respect to
34 /// the data that contains the checksum itself.
35 pub fn crc32c_masked(&self, buf: &[u8]) -> u32 {
36 let sum = self.crc32c(buf);
37 (sum.wrapping_shr(15) | sum.wrapping_shl(17)).wrapping_add(0xA282EAD8)
38 }
39
40 /// Returns the CRC32 checksum of `buf` using the Castagnoli polynomial.
41 #[cfg(not(target_arch = "x86_64"))]
42 fn crc32c(&self, buf: &[u8]) -> u32 {
43 crc32c_slice16(buf)
44 }
45
46 /// Returns the CRC32 checksum of `buf` using the Castagnoli polynomial.
47 #[cfg(target_arch = "x86_64")]
48 fn crc32c(&self, buf: &[u8]) -> u32 {
49 if self.sse42 {
50 // SAFETY: When sse42 is true, we are guaranteed to be running on
51 // a CPU that supports SSE 4.2.
52 unsafe { crc32c_sse(buf) }
53 } else {
54 crc32c_slice16(buf)
55 }
56 }
57}
58
59#[cfg(target_arch = "x86_64")]
60#[target_feature(enable = "sse4.2")]
61unsafe fn crc32c_sse(buf: &[u8]) -> u32 {
62 use std::arch::x86_64::*;
63
64 let mut crc: u32 = !0u32;
65 // SAFETY: This is safe since alignment is handled by align_to (oh how I
66 // love you) and since 8 adjacent u8's are guaranteed to have the same
67 // in-memory representation as u64 for all possible values.
68 let (prefix: &[u8], u64s: &[u64], suffix: &[u8]) = buf.align_to::<u64>();
69 for &b: u8 in prefix {
70 // SAFETY: Safe since we have sse4.2 enabled.
71 crc = _mm_crc32_u8(crc, v:b);
72 }
73 for &n: u64 in u64s {
74 // SAFETY: Safe since we have sse4.2 enabled.
75 crc = _mm_crc32_u64(crc as u64, v:n) as u32;
76 }
77 for &b: u8 in suffix {
78 // SAFETY: Safe since we have sse4.2 enabled.
79 crc = _mm_crc32_u8(crc, v:b);
80 }
81 !crc
82}
83
84/// Returns the CRC32 checksum of `buf` using the Castagnoli polynomial.
85fn crc32c_slice16(mut buf: &[u8]) -> u32 {
86 let mut crc: u32 = !0;
87 while buf.len() >= 16 {
88 crc ^= bytes::read_u32_le(buf);
89 crc = TABLE16[0][buf[15] as usize]
90 ^ TABLE16[1][buf[14] as usize]
91 ^ TABLE16[2][buf[13] as usize]
92 ^ TABLE16[3][buf[12] as usize]
93 ^ TABLE16[4][buf[11] as usize]
94 ^ TABLE16[5][buf[10] as usize]
95 ^ TABLE16[6][buf[9] as usize]
96 ^ TABLE16[7][buf[8] as usize]
97 ^ TABLE16[8][buf[7] as usize]
98 ^ TABLE16[9][buf[6] as usize]
99 ^ TABLE16[10][buf[5] as usize]
100 ^ TABLE16[11][buf[4] as usize]
101 ^ TABLE16[12][(crc >> 24) as u8 as usize]
102 ^ TABLE16[13][(crc >> 16) as u8 as usize]
103 ^ TABLE16[14][(crc >> 8) as u8 as usize]
104 ^ TABLE16[15][(crc) as u8 as usize];
105 buf = &buf[16..];
106 }
107 for &b in buf {
108 crc = TABLE[((crc as u8) ^ b) as usize] ^ (crc >> 8);
109 }
110 !crc
111}
112