| 1 | // Copyright 2009-2021 Intel Corporation | 
| 2 | // SPDX-License-Identifier: Apache-2.0 | 
| 3 |  | 
| 4 | #pragma once | 
| 5 |  | 
| 6 | #include "../math/math.h" | 
| 7 |  | 
| 8 | /* include SSE wrapper classes */ | 
| 9 | #if defined(__SSE__) | 
| 10 | #  include "sse.h" | 
| 11 | #endif | 
| 12 |  | 
| 13 | /* include AVX wrapper classes */ | 
| 14 | #if defined(__AVX__) | 
| 15 | #  include "avx.h" | 
| 16 | #endif | 
| 17 |  | 
| 18 | /* include AVX512 wrapper classes */ | 
| 19 | #if defined (__AVX512F__) | 
| 20 | #  include "avx512.h" | 
| 21 | #endif | 
| 22 |  | 
| 23 | namespace embree | 
| 24 | { | 
| 25 |   template <int N> | 
| 26 |   __forceinline vbool<N> isfinite(const vfloat<N>& v) | 
| 27 |   { | 
| 28 |     return (v >= vfloat<N>(-std::numeric_limits<float>::max())) | 
| 29 |          & (v <= vfloat<N>( std::numeric_limits<float>::max())); | 
| 30 |   } | 
| 31 |    | 
| 32 |   /* foreach unique */ | 
| 33 |   template<typename vbool, typename vint, typename Closure> | 
| 34 |   __forceinline void foreach_unique(const vbool& valid0, const vint& vi, const Closure& closure) | 
| 35 |   { | 
| 36 |     vbool valid1 = valid0; | 
| 37 |     while (any(valid1)) { | 
| 38 |       const int j = int(bsf(movemask(valid1))); | 
| 39 |       const int i = vi[j]; | 
| 40 |       const vbool valid2 = valid1 & (i == vi); | 
| 41 |       valid1 = andn(valid1, valid2); | 
| 42 |       closure(valid2, i); | 
| 43 |     } | 
| 44 |   } | 
| 45 |  | 
| 46 |   /* returns the next unique value i in vi and the corresponding valid_i mask */ | 
| 47 |   template<typename vbool, typename vint> | 
| 48 |   __forceinline int next_unique(vbool& valid, const vint& vi, /*out*/ vbool& valid_i) | 
| 49 |   { | 
| 50 |     assert(any(valid)); | 
| 51 |     const int j = int(bsf(movemask(valid))); | 
| 52 |     const int i = vi[j]; | 
| 53 |     valid_i = valid & (i == vi); | 
| 54 |     valid = andn(valid, valid_i); | 
| 55 |     return i; | 
| 56 |   } | 
| 57 |  | 
| 58 |   /* foreach unique index */ | 
| 59 |   template<typename vbool, typename vint, typename Closure> | 
| 60 |   __forceinline void foreach_unique_index(const vbool& valid0, const vint& vi, const Closure& closure) | 
| 61 |   { | 
| 62 |     vbool valid1 = valid0; | 
| 63 |     while (any(valid1)) { | 
| 64 |       const int j = int(bsf(movemask(valid1))); | 
| 65 |       const int i = vi[j]; | 
| 66 |       const vbool valid2 = valid1 & (i == vi); | 
| 67 |       valid1 = andn(valid1, valid2); | 
| 68 |       closure(valid2, i, j); | 
| 69 |     } | 
| 70 |   } | 
| 71 |  | 
| 72 |   /* returns the index of the next unique value i in vi and the corresponding valid_i mask */ | 
| 73 |   template<typename vbool, typename vint> | 
| 74 |   __forceinline int next_unique_index(vbool& valid, const vint& vi, /*out*/ vbool& valid_i) | 
| 75 |   { | 
| 76 |     assert(any(valid)); | 
| 77 |     const int j = int(bsf(movemask(valid))); | 
| 78 |     const int i = vi[j]; | 
| 79 |     valid_i = valid & (i == vi); | 
| 80 |     valid = andn(valid, valid_i); | 
| 81 |     return j; | 
| 82 |   } | 
| 83 |  | 
| 84 |   template<typename Closure> | 
| 85 |   __forceinline void foreach2(int x0, int x1, int y0, int y1, const Closure& closure) | 
| 86 |   { | 
| 87 |     __aligned(64) int U[2*VSIZEX]; | 
| 88 |     __aligned(64) int V[2*VSIZEX]; | 
| 89 |     int index = 0; | 
| 90 |     for (int y=y0; y<y1; y++) { | 
| 91 |       const bool lasty = y+1>=y1; | 
| 92 |       const vintx vy = y; | 
| 93 |       for (int x=x0; x<x1; ) { //x+=VSIZEX) { | 
| 94 |         const bool lastx = x+VSIZEX >= x1; | 
| 95 |         vintx vx = x+vintx(step); | 
| 96 |         vintx::storeu(ptr: &U[index], v: vx); | 
| 97 |         vintx::storeu(ptr: &V[index], v: vy); | 
| 98 |         const int dx = min(a: x1-x,b: VSIZEX); | 
| 99 |         index += dx; | 
| 100 |         x += dx; | 
| 101 |         if (index >= VSIZEX || (lastx && lasty)) { | 
| 102 |           const vboolx valid = vintx(step) < vintx(index); | 
| 103 |           closure(valid, vintx::load(a: U), vintx::load(a: V)); | 
| 104 |           x-= max(a: 0, b: index-VSIZEX); | 
| 105 |           index = 0; | 
| 106 |         } | 
| 107 |       } | 
| 108 |     } | 
| 109 |   } | 
| 110 | } | 
| 111 |  |