1 | //! Carry-less Multiplication (CLMUL) |
2 | //! |
3 | //! The reference is [Intel 64 and IA-32 Architectures Software Developer's |
4 | //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref] (p. 4-241). |
5 | //! |
6 | //! [intel64_ref]: http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf |
7 | |
8 | use crate::core_arch::x86::__m128i; |
9 | |
10 | #[cfg (test)] |
11 | use stdarch_test::assert_instr; |
12 | |
13 | #[allow (improper_ctypes)] |
14 | extern "C" { |
15 | #[link_name = "llvm.x86.pclmulqdq" ] |
16 | fn pclmulqdq(a: __m128i, round_key: __m128i, imm8: u8) -> __m128i; |
17 | } |
18 | |
19 | /// Performs a carry-less multiplication of two 64-bit polynomials over the |
20 | /// finite field GF(2). |
21 | /// |
22 | /// The immediate byte is used for determining which halves of `a` and `b` |
23 | /// should be used. Immediate bits other than 0 and 4 are ignored. |
24 | /// |
25 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128) |
26 | #[inline ] |
27 | #[target_feature (enable = "pclmulqdq" )] |
28 | #[cfg_attr (all(test, not(target_os = "linux" )), assert_instr(pclmulqdq, IMM8 = 0))] |
29 | #[cfg_attr (all(test, target_os = "linux" ), assert_instr(pclmullqlqdq, IMM8 = 0))] |
30 | #[cfg_attr (all(test, target_os = "linux" ), assert_instr(pclmulhqlqdq, IMM8 = 1))] |
31 | #[cfg_attr (all(test, target_os = "linux" ), assert_instr(pclmullqhqdq, IMM8 = 16))] |
32 | #[cfg_attr (all(test, target_os = "linux" ), assert_instr(pclmulhqhqdq, IMM8 = 17))] |
33 | #[rustc_legacy_const_generics (2)] |
34 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
35 | pub unsafe fn _mm_clmulepi64_si128<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { |
36 | static_assert_uimm_bits!(IMM8, 8); |
37 | pclmulqdq(a, round_key:b, IMM8 as u8) |
38 | } |
39 | |
40 | #[cfg (test)] |
41 | mod tests { |
42 | // The constants in the tests below are just bit patterns. They should not |
43 | // be interpreted as integers; signedness does not make sense for them, but |
44 | // __m128i happens to be defined in terms of signed integers. |
45 | #![allow (overflowing_literals)] |
46 | |
47 | use stdarch_test::simd_test; |
48 | |
49 | use crate::core_arch::x86::*; |
50 | |
51 | #[simd_test(enable = "pclmulqdq" )] |
52 | unsafe fn test_mm_clmulepi64_si128() { |
53 | // Constants taken from https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf |
54 | let a = _mm_set_epi64x(0x7b5b546573745665, 0x63746f725d53475d); |
55 | let b = _mm_set_epi64x(0x4869285368617929, 0x5b477565726f6e5d); |
56 | let r00 = _mm_set_epi64x(0x1d4d84c85c3440c0, 0x929633d5d36f0451); |
57 | let r01 = _mm_set_epi64x(0x1bd17c8d556ab5a1, 0x7fa540ac2a281315); |
58 | let r10 = _mm_set_epi64x(0x1a2bf6db3a30862f, 0xbabf262df4b7d5c9); |
59 | let r11 = _mm_set_epi64x(0x1d1e1f2c592e7c45, 0xd66ee03e410fd4ed); |
60 | |
61 | assert_eq_m128i(_mm_clmulepi64_si128::<0x00>(a, b), r00); |
62 | assert_eq_m128i(_mm_clmulepi64_si128::<0x10>(a, b), r01); |
63 | assert_eq_m128i(_mm_clmulepi64_si128::<0x01>(a, b), r10); |
64 | assert_eq_m128i(_mm_clmulepi64_si128::<0x11>(a, b), r11); |
65 | |
66 | let a0 = _mm_set_epi64x(0x0000000000000000, 0x8000000000000000); |
67 | let r = _mm_set_epi64x(0x4000000000000000, 0x0000000000000000); |
68 | assert_eq_m128i(_mm_clmulepi64_si128::<0x00>(a0, a0), r); |
69 | } |
70 | } |
71 | |