1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #pragma once |
5 | |
6 | #include "../sys/alloc.h" |
7 | #include "math.h" |
8 | #include "../simd/sse.h" |
9 | |
10 | namespace embree |
11 | { |
12 | //////////////////////////////////////////////////////////////////////////////// |
13 | /// SSE Vec3ba Type |
14 | //////////////////////////////////////////////////////////////////////////////// |
15 | |
16 | struct __aligned(16) Vec3ba |
17 | { |
18 | ALIGNED_STRUCT_(16); |
19 | |
20 | union { |
21 | __m128 m128; |
22 | struct { int x,y,z; }; |
23 | }; |
24 | |
25 | typedef int Scalar; |
26 | enum { N = 3 }; |
27 | |
28 | //////////////////////////////////////////////////////////////////////////////// |
29 | /// Constructors, Assignment & Cast Operators |
30 | //////////////////////////////////////////////////////////////////////////////// |
31 | |
32 | __forceinline Vec3ba( ) {} |
33 | __forceinline Vec3ba( const __m128 input ) : m128(input) {} |
34 | __forceinline Vec3ba( const Vec3ba& other ) : m128(other.m128) {} |
35 | __forceinline Vec3ba& operator =(const Vec3ba& other) { m128 = other.m128; return *this; } |
36 | |
37 | __forceinline explicit Vec3ba( bool a ) |
38 | : m128(mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)]) {} |
39 | __forceinline Vec3ba( bool a, bool b, bool c) |
40 | : m128(mm_lookupmask_ps[(size_t(c) << 2) | (size_t(b) << 1) | size_t(a)]) {} |
41 | |
42 | __forceinline operator const __m128&() const { return m128; } |
43 | __forceinline operator __m128&() { return m128; } |
44 | |
45 | //////////////////////////////////////////////////////////////////////////////// |
46 | /// Constants |
47 | //////////////////////////////////////////////////////////////////////////////// |
48 | |
49 | __forceinline Vec3ba( FalseTy ) : m128(_mm_setzero_ps()) {} |
50 | __forceinline Vec3ba( TrueTy ) : m128(_mm_castsi128_ps(a: _mm_cmpeq_epi32(a: _mm_setzero_si128(), b: _mm_setzero_si128()))) {} |
51 | |
52 | //////////////////////////////////////////////////////////////////////////////// |
53 | /// Array Access |
54 | //////////////////////////////////////////////////////////////////////////////// |
55 | |
56 | __forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; } |
57 | __forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; } |
58 | }; |
59 | |
60 | |
61 | //////////////////////////////////////////////////////////////////////////////// |
62 | /// Unary Operators |
63 | //////////////////////////////////////////////////////////////////////////////// |
64 | |
65 | __forceinline Vec3ba operator !( const Vec3ba& a ) { return _mm_xor_ps(a: a.m128, b: Vec3ba(embree::True)); } |
66 | |
67 | //////////////////////////////////////////////////////////////////////////////// |
68 | /// Binary Operators |
69 | //////////////////////////////////////////////////////////////////////////////// |
70 | |
71 | __forceinline Vec3ba operator &( const Vec3ba& a, const Vec3ba& b ) { return _mm_and_ps(a: a.m128, b: b.m128); } |
72 | __forceinline Vec3ba operator |( const Vec3ba& a, const Vec3ba& b ) { return _mm_or_ps (a: a.m128, b: b.m128); } |
73 | __forceinline Vec3ba operator ^( const Vec3ba& a, const Vec3ba& b ) { return _mm_xor_ps(a: a.m128, b: b.m128); } |
74 | |
75 | //////////////////////////////////////////////////////////////////////////////// |
76 | /// Assignment Operators |
77 | //////////////////////////////////////////////////////////////////////////////// |
78 | |
79 | __forceinline Vec3ba& operator &=( Vec3ba& a, const Vec3ba& b ) { return a = a & b; } |
80 | __forceinline Vec3ba& operator |=( Vec3ba& a, const Vec3ba& b ) { return a = a | b; } |
81 | __forceinline Vec3ba& operator ^=( Vec3ba& a, const Vec3ba& b ) { return a = a ^ b; } |
82 | |
83 | //////////////////////////////////////////////////////////////////////////////// |
84 | /// Comparison Operators + Select |
85 | //////////////////////////////////////////////////////////////////////////////// |
86 | |
87 | __forceinline bool operator ==( const Vec3ba& a, const Vec3ba& b ) { |
88 | return (_mm_movemask_ps(a: _mm_castsi128_ps(a: _mm_cmpeq_epi32(a: _mm_castps_si128(a: a.m128), b: _mm_castps_si128(a: b.m128)))) & 7) == 7; |
89 | } |
90 | __forceinline bool operator !=( const Vec3ba& a, const Vec3ba& b ) { |
91 | return (_mm_movemask_ps(a: _mm_castsi128_ps(a: _mm_cmpeq_epi32(a: _mm_castps_si128(a: a.m128), b: _mm_castps_si128(a: b.m128)))) & 7) != 7; |
92 | } |
93 | __forceinline bool operator < ( const Vec3ba& a, const Vec3ba& b ) { |
94 | if (a.x != b.x) return a.x < b.x; |
95 | if (a.y != b.y) return a.y < b.y; |
96 | if (a.z != b.z) return a.z < b.z; |
97 | return false; |
98 | } |
99 | |
100 | //////////////////////////////////////////////////////////////////////////////// |
101 | /// Reduction Operations |
102 | //////////////////////////////////////////////////////////////////////////////// |
103 | |
104 | __forceinline bool reduce_and( const Vec3ba& a ) { return (_mm_movemask_ps(a: a) & 0x7) == 0x7; } |
105 | __forceinline bool reduce_or ( const Vec3ba& a ) { return (_mm_movemask_ps(a: a) & 0x7) != 0x0; } |
106 | |
107 | __forceinline bool all ( const Vec3ba& b ) { return (_mm_movemask_ps(a: b) & 0x7) == 0x7; } |
108 | __forceinline bool any ( const Vec3ba& b ) { return (_mm_movemask_ps(a: b) & 0x7) != 0x0; } |
109 | __forceinline bool none ( const Vec3ba& b ) { return (_mm_movemask_ps(a: b) & 0x7) == 0x0; } |
110 | |
111 | __forceinline size_t movemask(const Vec3ba& a) { return _mm_movemask_ps(a: a) & 0x7; } |
112 | |
113 | //////////////////////////////////////////////////////////////////////////////// |
114 | /// Output Operators |
115 | //////////////////////////////////////////////////////////////////////////////// |
116 | |
117 | __forceinline embree_ostream operator<<(embree_ostream cout, const Vec3ba& a) { |
118 | return cout << "(" << (a.x ? "1" : "0" ) << ", " << (a.y ? "1" : "0" ) << ", " << (a.z ? "1" : "0" ) << ")" ; |
119 | } |
120 | } |
121 | |