| 1 | // Copyright 2009-2021 Intel Corporation | 
| 2 | // SPDX-License-Identifier: Apache-2.0 | 
| 3 |  | 
| 4 | #include "bvh_rotate.h" | 
| 5 |  | 
| 6 | namespace embree | 
| 7 | { | 
| 8 |   namespace isa  | 
| 9 |   { | 
| 10 |     /*! Computes half surface area of box. */ | 
| 11 |     __forceinline float halfArea3f(const BBox<vfloat4>& box) { | 
| 12 |       const vfloat4 d = box.size(); | 
| 13 |       const vfloat4 a = d*shuffle<1,2,0,3>(v: d); | 
| 14 |       return a[0]+a[1]+a[2]; | 
| 15 |     } | 
| 16 |      | 
| 17 |     size_t BVHNRotate<4>::rotate(NodeRef parentRef, size_t depth) | 
| 18 |     { | 
| 19 |       /*! nothing to rotate if we reached a leaf node. */ | 
| 20 |       if (parentRef.isBarrier()) return 0; | 
| 21 |       if (parentRef.isLeaf()) return 0; | 
| 22 |       AABBNode* parent = parentRef.getAABBNode(); | 
| 23 |        | 
| 24 |       /*! rotate all children first */ | 
| 25 |       vint4 cdepth; | 
| 26 |       for (size_t c=0; c<4; c++) | 
| 27 | 	cdepth[c] = (int)rotate(parentRef: parent->child(i: c),depth: depth+1); | 
| 28 |        | 
| 29 |       /* compute current areas of all children */ | 
| 30 |       vfloat4 sizeX = parent->upper_x-parent->lower_x; | 
| 31 |       vfloat4 sizeY = parent->upper_y-parent->lower_y; | 
| 32 |       vfloat4 sizeZ = parent->upper_z-parent->lower_z; | 
| 33 |       vfloat4 childArea = madd(a: sizeX,b: (sizeY + sizeZ),c: sizeY*sizeZ); | 
| 34 |        | 
| 35 |       /*! get node bounds */ | 
| 36 |       BBox<vfloat4> child1_0,child1_1,child1_2,child1_3; | 
| 37 |       parent->bounds(bounds0&: child1_0,bounds1&: child1_1,bounds2&: child1_2,bounds3&: child1_3); | 
| 38 |        | 
| 39 |       /*! Find best rotation. We pick a first child (child1) and a sub-child  | 
| 40 | 	(child2child) of a different second child (child2), and swap child1  | 
| 41 | 	and child2child. We perform the best such swap. */ | 
| 42 |       float bestArea = 0; | 
| 43 |       size_t bestChild1 = -1, bestChild2 = -1, bestChild2Child = -1; | 
| 44 |       for (size_t c2=0; c2<4; c2++) | 
| 45 |       { | 
| 46 | 	/*! ignore leaf nodes as we cannot descent into them */ | 
| 47 | 	if (parent->child(i: c2).isBarrier()) continue; | 
| 48 | 	if (parent->child(i: c2).isLeaf()) continue; | 
| 49 | 	AABBNode* child2 = parent->child(i: c2).getAABBNode(); | 
| 50 | 	 | 
| 51 | 	/*! transpose child bounds */ | 
| 52 | 	BBox<vfloat4> child2c0,child2c1,child2c2,child2c3; | 
| 53 | 	child2->bounds(bounds0&: child2c0,bounds1&: child2c1,bounds2&: child2c2,bounds3&: child2c3); | 
| 54 | 	 | 
| 55 | 	/*! put child1_0 at each child2 position */ | 
| 56 | 	float cost00 = halfArea3f(box: merge(a: child1_0,b: child2c1,c: child2c2,d: child2c3)); | 
| 57 | 	float cost01 = halfArea3f(box: merge(a: child2c0,b: child1_0,c: child2c2,d: child2c3)); | 
| 58 | 	float cost02 = halfArea3f(box: merge(a: child2c0,b: child2c1,c: child1_0,d: child2c3)); | 
| 59 | 	float cost03 = halfArea3f(box: merge(a: child2c0,b: child2c1,c: child2c2,d: child1_0)); | 
| 60 | 	vfloat4 cost0 = vfloat4(cost00,cost01,cost02,cost03); | 
| 61 | 	vfloat4 min0 = vreduce_min(v: cost0); | 
| 62 | 	int pos0 = (int)bsf(v: movemask(a: min0 == cost0)); | 
| 63 | 	 | 
| 64 | 	/*! put child1_1 at each child2 position */ | 
| 65 | 	float cost10 = halfArea3f(box: merge(a: child1_1,b: child2c1,c: child2c2,d: child2c3)); | 
| 66 | 	float cost11 = halfArea3f(box: merge(a: child2c0,b: child1_1,c: child2c2,d: child2c3)); | 
| 67 | 	float cost12 = halfArea3f(box: merge(a: child2c0,b: child2c1,c: child1_1,d: child2c3)); | 
| 68 | 	float cost13 = halfArea3f(box: merge(a: child2c0,b: child2c1,c: child2c2,d: child1_1)); | 
| 69 | 	vfloat4 cost1 = vfloat4(cost10,cost11,cost12,cost13); | 
| 70 | 	vfloat4 min1 = vreduce_min(v: cost1); | 
| 71 | 	int pos1 = (int)bsf(v: movemask(a: min1 == cost1)); | 
| 72 | 	 | 
| 73 | 	/*! put child1_2 at each child2 position */ | 
| 74 | 	float cost20 = halfArea3f(box: merge(a: child1_2,b: child2c1,c: child2c2,d: child2c3)); | 
| 75 | 	float cost21 = halfArea3f(box: merge(a: child2c0,b: child1_2,c: child2c2,d: child2c3)); | 
| 76 | 	float cost22 = halfArea3f(box: merge(a: child2c0,b: child2c1,c: child1_2,d: child2c3)); | 
| 77 | 	float cost23 = halfArea3f(box: merge(a: child2c0,b: child2c1,c: child2c2,d: child1_2)); | 
| 78 | 	vfloat4 cost2 = vfloat4(cost20,cost21,cost22,cost23); | 
| 79 | 	vfloat4 min2 = vreduce_min(v: cost2); | 
| 80 | 	int pos2 = (int)bsf(v: movemask(a: min2 == cost2)); | 
| 81 | 	 | 
| 82 | 	/*! put child1_3 at each child2 position */ | 
| 83 | 	float cost30 = halfArea3f(box: merge(a: child1_3,b: child2c1,c: child2c2,d: child2c3)); | 
| 84 | 	float cost31 = halfArea3f(box: merge(a: child2c0,b: child1_3,c: child2c2,d: child2c3)); | 
| 85 | 	float cost32 = halfArea3f(box: merge(a: child2c0,b: child2c1,c: child1_3,d: child2c3)); | 
| 86 | 	float cost33 = halfArea3f(box: merge(a: child2c0,b: child2c1,c: child2c2,d: child1_3)); | 
| 87 | 	vfloat4 cost3 = vfloat4(cost30,cost31,cost32,cost33); | 
| 88 | 	vfloat4 min3 = vreduce_min(v: cost3); | 
| 89 | 	int pos3 = (int)bsf(v: movemask(a: min3 == cost3)); | 
| 90 | 	 | 
| 91 | 	/*! find best other child */ | 
| 92 | 	vfloat4 area0123 = vfloat4(extract<0>(a: min0),extract<0>(a: min1),extract<0>(a: min2),extract<0>(a: min3)) - vfloat4(childArea[c2]); | 
| 93 | 	int pos[4] = { pos0,pos1,pos2,pos3 }; | 
| 94 | 	const size_t mbd = BVH4::maxBuildDepth; | 
| 95 | 	vbool4 valid = vint4(int(depth+1))+cdepth <= vint4(mbd); // only select swaps that fulfill depth constraints | 
| 96 | 	valid &= vint4(int(c2)) != vint4(step); | 
| 97 | 	if (none(b: valid)) continue; | 
| 98 | 	size_t c1 = select_min(valid,v: area0123); | 
| 99 | 	float area = area0123[c1];  | 
| 100 |         if (c1 == c2) continue; // can happen if bounds are NANs | 
| 101 | 	 | 
| 102 | 	/*! accept a swap when it reduces cost and is not swapping a node with itself */ | 
| 103 | 	if (area < bestArea) { | 
| 104 | 	  bestArea = area; | 
| 105 | 	  bestChild1 = c1; | 
| 106 | 	  bestChild2 = c2; | 
| 107 | 	  bestChild2Child = pos[c1]; | 
| 108 | 	} | 
| 109 |       } | 
| 110 |        | 
| 111 |       /*! if we did not find a swap that improves the SAH then do nothing */ | 
| 112 |       if (bestChild1 == size_t(-1)) return 1+reduce_max(v: cdepth); | 
| 113 |        | 
| 114 |       /*! perform the best found tree rotation */ | 
| 115 |       AABBNode* child2 = parent->child(i: bestChild2).getAABBNode(); | 
| 116 |       AABBNode::swap(a: parent,i: bestChild1,b: child2,j: bestChild2Child); | 
| 117 |       parent->setBounds(i: bestChild2,bounds: child2->bounds()); | 
| 118 |       AABBNode::compact(a: parent); | 
| 119 |       AABBNode::compact(a: child2); | 
| 120 |        | 
| 121 |       /*! This returned depth is conservative as the child that was | 
| 122 |        *  pulled up in the tree could have been on the critical path. */ | 
| 123 |       cdepth[bestChild1]++; // bestChild1 was pushed down one level | 
| 124 |       return 1+reduce_max(v: cdepth);  | 
| 125 |     } | 
| 126 |   } | 
| 127 | } | 
| 128 |  |