1//! Carry-less Multiplication (CLMUL)
2//!
3//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
4//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref] (p. 4-241).
5//!
6//! [intel64_ref]: http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
7
8use crate::core_arch::x86::__m128i;
9
10#[cfg(test)]
11use stdarch_test::assert_instr;
12
13#[allow(improper_ctypes)]
14unsafe extern "C" {
15 #[link_name = "llvm.x86.pclmulqdq"]
16 unsafefn pclmulqdq(a: __m128i, round_key: __m128i, imm8: u8) -> __m128i;
17}
18
19/// Performs a carry-less multiplication of two 64-bit polynomials over the
20/// finite field GF(2).
21///
22/// The immediate byte is used for determining which halves of `a` and `b`
23/// should be used. Immediate bits other than 0 and 4 are ignored.
24///
25/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128)
26#[inline]
27#[target_feature(enable = "pclmulqdq")]
28#[cfg_attr(test, assert_instr(pclmul, IMM8 = 0))]
29#[rustc_legacy_const_generics(2)]
30#[stable(feature = "simd_x86", since = "1.27.0")]
31pub fn _mm_clmulepi64_si128<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
32 static_assert_uimm_bits!(IMM8, 8);
33 unsafe { pclmulqdq(a, round_key:b, IMM8 as u8) }
34}
35
36#[cfg(test)]
37mod tests {
38 // The constants in the tests below are just bit patterns. They should not
39 // be interpreted as integers; signedness does not make sense for them, but
40 // __m128i happens to be defined in terms of signed integers.
41 #![allow(overflowing_literals)]
42
43 use stdarch_test::simd_test;
44
45 use crate::core_arch::x86::*;
46
47 #[simd_test(enable = "pclmulqdq")]
48 unsafe fn test_mm_clmulepi64_si128() {
49 // Constants taken from https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf
50 let a = _mm_set_epi64x(0x7b5b546573745665, 0x63746f725d53475d);
51 let b = _mm_set_epi64x(0x4869285368617929, 0x5b477565726f6e5d);
52 let r00 = _mm_set_epi64x(0x1d4d84c85c3440c0, 0x929633d5d36f0451);
53 let r01 = _mm_set_epi64x(0x1bd17c8d556ab5a1, 0x7fa540ac2a281315);
54 let r10 = _mm_set_epi64x(0x1a2bf6db3a30862f, 0xbabf262df4b7d5c9);
55 let r11 = _mm_set_epi64x(0x1d1e1f2c592e7c45, 0xd66ee03e410fd4ed);
56
57 assert_eq_m128i(_mm_clmulepi64_si128::<0x00>(a, b), r00);
58 assert_eq_m128i(_mm_clmulepi64_si128::<0x10>(a, b), r01);
59 assert_eq_m128i(_mm_clmulepi64_si128::<0x01>(a, b), r10);
60 assert_eq_m128i(_mm_clmulepi64_si128::<0x11>(a, b), r11);
61
62 let a0 = _mm_set_epi64x(0x0000000000000000, 0x8000000000000000);
63 let r = _mm_set_epi64x(0x4000000000000000, 0x0000000000000000);
64 assert_eq_m128i(_mm_clmulepi64_si128::<0x00>(a0, a0), r);
65 }
66}
67