How can I speed up my SAT test?

Please don't post Bullet support questions here, use the above forums instead.
cairno
Posts: 1
Joined: Thu Nov 05, 2020 7:22 pm

How can I speed up my SAT test?

Post by cairno »

Howdy folks,

I have an implementation of the SAT algorithm as described by Dirk's GDC talk and I'm trying to improve the performance. I'm only doing discrete collision detection. This is how my collision detection currently works:
1. Perform broadphase
2. Transform all potentially colliding convex polyhedra verts/normals into world space
3. Perform SAT on all pairs found by broadphase and regenerate all contact points

I don't do any caching of separating axes, I found it slower than just brute force retesting surprisingly. Currently the slowest part of my SAT implementation is the edge-edge tests.
For my implementation, I basically just dumbly converted Dirk's algorithm to SIMD and it seems to be around 3-4x faster than a scalar implementation.
First I just find all potential edge pairs and then iterate over the pairs checking for overlap or separation.
Finding the pairs tends to takes around 50-70% of the time. I tried also computing edge distance in the inner loop but it's maybe 30% slower than computing edge distances after pruning.

Code: Select all

// From Daniel Lemire's simd prune library
static inline __m128i prune_epi32(__m128i x, int mask) 
{
  return _mm_shuffle_epi8(x, _mm_loadu_si128((const __m128i *)mask128_epi32 + mask));
}

struct TransformedConvexShapeData
{
    int num_edges;

    float *edge_tangents_x;
    float *edge_tangents_y;
    float *edge_tangents_z;

    float *edge_normals_L_x;
    float *edge_normals_L_y;
    float *edge_normals_L_z;

    float *edge_normals_R_x;
    float *edge_normals_R_y;
    float *edge_normals_R_z;

    float *edge_verts_x;
    float *edge_verts_y;
    float *edge_verts_z;
};

bool test_edge_axes_sse(const TransformedConvexShapeData &a, const TransformedConvexShapeData &b, TestEdgeAxesRes &res)
{
    int edges_a[MAX_EDGE_PAIRS];
    int edges_b[MAX_EDGE_PAIRS];
    int num_pairs = 0;

    __m128i index_a = _mm_set_epi32(3,2,1,0);
    __m128i max_edges_a = _mm_set1_epi32(a.num_edges);
    for(int i = 0; i < a.num_edges; i += 4)
    {
        // Load normals for the 2 faces connected to the edge. 
        // These normals are the arc endpoints on the unit sphere.
        __m128 Ax = _mm_loadu_ps(a.edge_normals_L_x+i);
        __m128 Ay = _mm_loadu_ps(a.edge_normals_L_y+i);
        __m128 Az = _mm_loadu_ps(a.edge_normals_L_z+i);
        __m128 Bx = _mm_loadu_ps(a.edge_normals_R_x+i);
        __m128 By = _mm_loadu_ps(a.edge_normals_R_y+i);
        __m128 Bz = _mm_loadu_ps(a.edge_normals_R_z+i);

        // Load the edge's direction which is normal to the plane through points A and B
        __m128 B_x_Ax = _mm_loadu_ps(a.edge_tangents_x+i);
        __m128 B_x_Ay = _mm_loadu_ps(a.edge_tangents_y+i);
        __m128 B_x_Az = _mm_loadu_ps(a.edge_tangents_z+i);

        // This will mask out lanes which are out of bounds.
        __m128i out_of_bounds = _mm_xor_si128(_mm_cmplt_epi32(index_a, max_edges_a), _mm_set1_epi32(~0));
        __m128i index_b = _mm_set1_epi32(0);
        for (int j = 0; j < b.num_edges; j++)
        {
            // Load normals as in the outer loop, we need to negate these since we're testing a minkowski _difference_
            __m128 Cx = _mm_xor_ps(_mm_set1_ps(-0.), _mm_broadcast_ss(b.edge_normals_L_x+j));
            __m128 Cy = _mm_xor_ps(_mm_set1_ps(-0.), _mm_broadcast_ss(b.edge_normals_L_y+j));
            __m128 Cz = _mm_xor_ps(_mm_set1_ps(-0.), _mm_broadcast_ss(b.edge_normals_L_z+j));
            __m128 Dx = _mm_xor_ps(_mm_set1_ps(-0.), _mm_broadcast_ss(b.edge_normals_R_x+j));
            __m128 Dy = _mm_xor_ps(_mm_set1_ps(-0.), _mm_broadcast_ss(b.edge_normals_R_y+j));
            __m128 Dz = _mm_xor_ps(_mm_set1_ps(-0.), _mm_broadcast_ss(b.edge_normals_R_z+j));

            // Same as outer loop.
            __m128 D_x_Cx = _mm_broadcast_ss(b.edge_tangents_x+j);
            __m128 D_x_Cy = _mm_broadcast_ss(b.edge_tangents_y+j);
            __m128 D_x_Cz = _mm_broadcast_ss(b.edge_tangents_z+j);

            // Two edges form a face on the MD if [C,B,A]*[D,B,A] < 0 && [A,D,C]*[B,D,C] < 0 && [C,B,A]*[B,D,C] > 0 (brackets denote triple product)
            //    - [C,B,A]*[D,B,A] < 0 Implies the points C and D lie on different sides of the plane A,B and the origin.
            //    - [A,D,C]*[B,D,C] < 0 Implies the points A and B lie on different sides of the plane through C,D and the origin.
            //    - [C,B,A]*[B,D,C] > 0 We need to test if the arcs AB and CD are in the same hemisphere. This is true if A and D lie on the same side of the plane through C,D and the origin. 
            //                          This can be tested with 0 < [A,C,B]*[D,C,B] == [C,B,A]*[B,D,C] since the triple product is invariant under cyclic permutations.
            __m128 CBA = _mm_add_ps(_mm_add_ps(_mm_mul_ps(Cx,B_x_Ax), _mm_mul_ps(Cy,B_x_Ay)), _mm_mul_ps(Cz,B_x_Az));
            __m128 DBA = _mm_add_ps(_mm_add_ps(_mm_mul_ps(Dx,B_x_Ax), _mm_mul_ps(Dy,B_x_Ay)), _mm_mul_ps(Dz,B_x_Az));
            __m128 ADC = _mm_add_ps(_mm_add_ps(_mm_mul_ps(Ax,D_x_Cx), _mm_mul_ps(Ay,D_x_Cy)), _mm_mul_ps(Az,D_x_Cz));
            __m128 BDC = _mm_add_ps(_mm_add_ps(_mm_mul_ps(Bx,D_x_Cx), _mm_mul_ps(By,D_x_Cy)), _mm_mul_ps(Bz,D_x_Cz));
            __m128 CBA_DBA = _mm_mul_ps(CBA, DBA);
            __m128 ADC_BDC = _mm_mul_ps(ADC, BDC);
            __m128 CBA_BDC = _mm_mul_ps(CBA, BDC);
            __m128 not_mf = _mm_or_ps(_mm_or_ps(_mm_cmpge_ps(CBA_DBA,_mm_set1_ps(0)), _mm_cmpge_ps(ADC_BDC,_mm_set1_ps(0))), _mm_cmple_ps(CBA_BDC,_mm_set1_ps(0)));

            // We store only the pairs which form a minkowski face
            __m128 prune_mask = _mm_or_ps(_mm_castsi128_ps(out_of_bounds), not_mf);
            int move_mask = _mm_movemask_ps(prune_mask);
            __m128i store_index_a = prune_epi32(index_a,move_mask);
            __m128i store_index_b = prune_epi32(index_b,move_mask);
            _mm_storeu_si128((__m128i*)(edges_a+num_pairs),store_index_a);
            _mm_storeu_si128((__m128i*)(edges_b+num_pairs),store_index_b);
            num_pairs += 4-__popcnt(move_mask);


            index_b = _mm_add_epi32(index_b,_mm_set1_epi32(1));
        }

        index_a = _mm_add_epi32(index_a, _mm_set1_epi32(4));
    }

    // Naively Iterate over all potential pairs and find contact points/early out/etc.

}
irlanrobson
Posts: 16
Joined: Sun Feb 08, 2015 3:21 pm
Location: Brazil

Re: How can I speed up my SAT test?

Post by irlanrobson »

You don't need to regenerate all contact points.

I either write a separation or overlap cache and check this in the next frame.

Then if the shapes are overlapping in the previous and current frame rebuild the contact. In the case of a face overlap check if the relative orientation of the bodies has changed significantly. If the relative orientation hasn't changed then rebuild a face contact using clipping. Otherwise re-run SAT. For edge overlaps check if the closest points are lying on the opposite segments.

If the shapes are separated again then we hit the cache!

Then you need to handle some cases of course.

Note that this is not only a optimization but also a way to improve convergence if you are warm starting the solver!

Code: Select all


/*
* Copyright (c) 2016-2019 Irlan Robson 
*
* This software is provided 'as-is', without any express or implied
* warranty.  In no event will the authors be held liable for any damages
* arising from the use of this software.
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/

#include <bounce/dynamics/contacts/collide/collide.h>
#include <bounce/dynamics/contacts/collide/clip.h>
#include <bounce/dynamics/contacts/manifold.h>
#include <bounce/dynamics/contacts/contact_cluster.h>
#include <bounce/dynamics/shapes/hull_shape.h>
#include <bounce/dynamics/body.h>
#include <bounce/collision/shapes/hull.h>

bool b3_convexCache = true;
u32 b3_convexCalls = 0, b3_convexCacheHits = 0;

static void b3BuildEdgeContact(b3Manifold& manifold,
	const b3Transform& xf1, u32 index1, const b3HullShape* s1,
	const b3Transform& xf2, u32 index2, const b3HullShape* s2)
{
	const b3Hull* hull1 = s1->m_hull;
	const b3HalfEdge* edge1 = hull1->GetEdge(index1);
	const b3HalfEdge* twin1 = hull1->GetEdge(index1 + 1);

	b3Vec3 C1 = xf1 * hull1->centroid;
	b3Vec3 P1 = xf1 * hull1->GetVertex(edge1->origin);
	b3Vec3 Q1 = xf1 * hull1->GetVertex(twin1->origin);
	b3Vec3 E1 = Q1 - P1;
	b3Vec3 N1 = E1;
	scalar L1 = N1.Normalize();
	B3_ASSERT(L1 > B3_LINEAR_SLOP);

	const b3Hull* hull2 = s2->m_hull;
	const b3HalfEdge* edge2 = hull2->GetEdge(index2);
	const b3HalfEdge* twin2 = hull2->GetEdge(index2 + 1);

	b3Vec3 C2 = xf2 * hull2->centroid;
	b3Vec3 P2 = xf2 * hull2->GetVertex(edge2->origin);
	b3Vec3 Q2 = xf2 * hull2->GetVertex(twin2->origin);
	b3Vec3 E2 = Q2 - P2;
	b3Vec3 N2 = E2;
	scalar L2 = N2.Normalize();
	B3_ASSERT(L2 > B3_LINEAR_SLOP);

	// Compute the closest points on the two lines.
	scalar b = b3Dot(N1, N2);
	scalar den = scalar(1) - b * b;
	if (den == scalar(0))
	{
		return;
	}

	scalar inv_den = scalar(1) / den;

	b3Vec3 E3 = P1 - P2;

	scalar d = b3Dot(N1, E3);
	scalar e = b3Dot(N2, E3);

	scalar s = inv_den * (b * e - d);
	scalar t = inv_den * (e - b * d);

	b3Vec3 c1 = P1 + s * N1;
	b3Vec3 c2 = P2 + t * N2;

	// Ensure normal orientation to hull 2.
	b3Vec3 N = b3Cross(E1, E2);
	scalar LN = N.Normalize();
	B3_ASSERT(LN > scalar(0));
	if (b3Dot(N, P1 - C1) < scalar(0))
	{
		N = -N;
	}

	b3FeaturePair pair = b3MakePair(index1, index1 + 1, index2, index2 + 1);

	manifold.pointCount = 1;
	manifold.points[0].localNormal1 = b3MulC(xf1.rotation, N);
	manifold.points[0].localPoint1 = b3MulT(xf1, c1);
	manifold.points[0].localPoint2 = b3MulT(xf2, c2);
	manifold.points[0].key = b3MakeKey(pair);
	manifold.points[0].featurePair = pair;
	manifold.points[0].edgeContact = true;
}

static void b3BuildFaceContact(b3Manifold& manifold,
	const b3Transform& xf1, u32 index1, const b3HullShape* s1,
	const b3Transform& xf2, const b3HullShape* s2,
	bool flipNormal)
{
	const b3Hull* hull1 = s1->m_hull;
	scalar r1 = s1->m_radius;

	const b3Hull* hull2 = s2->m_hull;
	scalar r2 = s2->m_radius;

	scalar totalRadius = r1 + r2;

	// 1. Define the reference face plane (1).
	const b3Face* face1 = hull1->GetFace(index1);
	const b3HalfEdge* edge1 = hull1->GetEdge(face1->edge);
	b3Plane localPlane1 = hull1->GetPlane(index1);
	b3Vec3 localNormal1 = localPlane1.normal;
	b3Vec3 localPoint1 = hull1->GetVertex(edge1->origin);
	b3Plane plane1 = b3Mul(xf1, localPlane1);

	// 2. Find the incident face polygon (2).	

	// Put the reference plane normal in the frame of the incident hull (2).
	b3Vec3 normal1 = b3MulC(xf2.rotation, plane1.normal);

	// Find the support face polygon in the *negated* direction.
	b3StackArray<b3ClipVertex, 32> polygon2;
	u32 index2 = hull2->GetSupportFace(-normal1);
	b3BuildPolygon(polygon2, xf2, index2, hull2);

	// 3. Clip incident face polygon (2) against the reference face (1) side planes.
	b3StackArray<b3ClipVertex, 32> clipPolygon2;
	b3ClipPolygonToFace(clipPolygon2, polygon2, xf1, totalRadius, index1, hull1);
	if (clipPolygon2.IsEmpty())
	{
		return;
	}

	// 4. Project the clipped polygon on the reference plane for reduction.
	// Ensure the deepest point is contained in the reduced polygon.
	b3StackArray<b3ClusterPolygonVertex, 32> polygon1;

	u32 minIndex = 0;
	scalar minSeparation = B3_MAX_SCALAR;

	for (u32 i = 0; i < clipPolygon2.Count(); ++i)
	{
		b3ClipVertex v2 = clipPolygon2[i];
		scalar separation = b3Distance(v2.position, plane1);

		if (separation <= totalRadius)
		{
			if (separation < minSeparation)
			{
				minIndex = polygon1.Count();
				minSeparation = separation;
			}

			b3ClusterPolygonVertex v1;
			v1.position = b3ClosestPointOnPlane(v2.position, plane1);
			v1.clipIndex = i;
			polygon1.PushBack(v1);
		}
	}

	if (polygon1.IsEmpty())
	{
		return;
	}

	// 5. Reduce.
	b3Vec3 normal = plane1.normal;

	// Ensure normal orientation to hull 2.
	b3Vec3 s_normal = flipNormal ? -normal : normal;

	b3StackArray<b3ClusterPolygonVertex, 32> reducedPolygon1;
	b3ReducePolygon(reducedPolygon1, polygon1, s_normal, minIndex);
	B3_ASSERT(!reducedPolygon1.IsEmpty());

	// 6. Build face contact.
	u32 pointCount = reducedPolygon1.Count();
	for (u32 i = 0; i < pointCount; ++i)
	{
		u32 clipIndex = reducedPolygon1[i].clipIndex;
		b3ClipVertex v2 = clipPolygon2[clipIndex];
		b3Vec3 v1 = b3ClosestPointOnPlane(v2.position, plane1);

		b3ManifoldPoint* mp = manifold.points + i;

		if (flipNormal)
		{
			// Swap the feature pairs.
			b3FeaturePair pair = b3MakePair(v2.pair.inEdge2, v2.pair.outEdge2, v2.pair.inEdge1, v2.pair.outEdge1);

			mp->localNormal1 = b3MulC(xf2.rotation, s_normal);
			mp->localPoint1 = b3MulT(xf2, v2.position);
			mp->localPoint2 = b3MulT(xf1, v1);
			mp->key = b3MakeKey(pair);
			mp->featurePair = pair;
			mp->edgeContact = false;
		}
		else
		{
			mp->localNormal1 = b3MulC(xf1.rotation, normal);
			mp->localPoint1 = b3MulT(xf1, v1);
			mp->localPoint2 = b3MulT(xf2, v2.position);
			mp->key = b3MakeKey(v2.pair);
			mp->featurePair = v2.pair;
			mp->edgeContact = false;
		}
	}

	manifold.pointCount = pointCount;
}

static void b3CollideHulls(b3Manifold& manifold,
	const b3Transform& xf1, const b3HullShape* s1,
	const b3Transform& xf2, const b3HullShape* s2)
{
	B3_ASSERT(manifold.pointCount == 0);

	const b3Hull* hull1 = s1->m_hull;
	scalar r1 = s1->m_radius;

	const b3Hull* hull2 = s2->m_hull;
	scalar r2 = s2->m_radius;

	scalar totalRadius = r1 + r2;

	b3FaceQuery faceQuery1 = b3QueryFaceSeparation(xf1, hull1, xf2, hull2);
	if (faceQuery1.separation > totalRadius)
	{
		return;
	}

	b3FaceQuery faceQuery2 = b3QueryFaceSeparation(xf2, hull2, xf1, hull1);
	if (faceQuery2.separation > totalRadius)
	{
		return;
	}

	b3EdgeQuery edgeQuery = b3QueryEdgeSeparation(xf1, hull1, xf2, hull2);
	if (edgeQuery.separation > totalRadius)
	{
		return;
	}

	const scalar kTol = scalar(0.1) * B3_LINEAR_SLOP;
	if (edgeQuery.separation > b3Max(faceQuery1.separation, faceQuery2.separation) + kTol)
	{
		b3BuildEdgeContact(manifold, xf1, edgeQuery.index1, s1, xf2, edgeQuery.index2, s2);
	}
	else
	{
		if (faceQuery1.separation + kTol > faceQuery2.separation)
		{
			b3BuildFaceContact(manifold, xf1, faceQuery1.index, s1, xf2, s2, false);
		}
		else
		{
			b3BuildFaceContact(manifold, xf2, faceQuery2.index, s2, xf1, s1, true);
		}
	}

	// Heuristic succeded. 
	if (manifold.pointCount > 0)
	{
		return;
	}

	// Heuristic failed. Fallback.
	if (edgeQuery.separation > b3Max(faceQuery1.separation, faceQuery2.separation))
	{
		b3BuildEdgeContact(manifold, xf1, edgeQuery.index1, s1, xf2, edgeQuery.index2, s2);
	}
	else
	{
		if (faceQuery1.separation > faceQuery2.separation)
		{
			b3BuildFaceContact(manifold, xf1, faceQuery1.index, s1, xf2, s2, false);
		}
		else
		{
			b3BuildFaceContact(manifold, xf2, faceQuery2.index, s2, xf1, s1, true);
		}
	}

	// When both convex hulls are not simplified clipping might fail and create no contact points.
	// For example, when a hull contains tiny faces, coplanar faces, and/or non-sharped edges.
	// So we need to use an heuristic to create a correct contact point.
}

///////////////////////////////////////////////////////////////////////////////////////////////////

static void b3RebuildEdgeContact(b3Manifold& manifold,
	const b3Transform& xf1, u32 index1, const b3HullShape* s1,
	const b3Transform& xf2, u32 index2, const b3HullShape* s2)
{
	const b3Hull* hull1 = s1->m_hull;
	const b3HalfEdge* edge1 = hull1->GetEdge(index1);
	const b3HalfEdge* twin1 = hull1->GetEdge(index1 + 1);

	b3Vec3 C1 = xf1 * hull1->centroid;
	b3Vec3 P1 = xf1 * hull1->GetVertex(edge1->origin);
	b3Vec3 Q1 = xf1 * hull1->GetVertex(twin1->origin);
	b3Vec3 E1 = Q1 - P1;
	b3Vec3 N1 = E1;
	scalar L1 = N1.Normalize();
	B3_ASSERT(L1 > scalar(0));

	const b3Hull* hull2 = s2->m_hull;
	const b3HalfEdge* edge2 = hull2->GetEdge(index2);
	const b3HalfEdge* twin2 = hull2->GetEdge(index2 + 1);

	b3Vec3 C2 = xf2 * hull2->centroid;
	b3Vec3 P2 = xf2 * hull2->GetVertex(edge2->origin);
	b3Vec3 Q2 = xf2 * hull2->GetVertex(twin2->origin);
	b3Vec3 E2 = Q2 - P2;
	b3Vec3 N2 = E2;
	scalar L2 = N2.Normalize();
	B3_ASSERT(L2 > scalar(0));

	// Compute the closest points on the two lines.
	scalar b = b3Dot(N1, N2);
	scalar den = scalar(1) - b * b;
	if (den == scalar(0))
	{
		return;
	}

	scalar inv_den = scalar(1) / den;

	b3Vec3 E3 = P1 - P2;

	scalar d = b3Dot(N1, E3);
	scalar e = b3Dot(N2, E3);

	scalar s = inv_den * (b * e - d);
	scalar t = inv_den * (e - b * d);

	b3Vec3 c1 = P1 + s * N1;
	b3Vec3 c2 = P2 + t * N2;

	// Check if the closest points are still lying on the opposite segments 
	// using Barycentric coordinates.
	scalar w2[3];
	b3BarycentricCoordinates(w2, P1, Q1, c2);

	scalar w1[3];
	b3BarycentricCoordinates(w1, P2, Q2, c1);

	if (w2[1] > scalar(0) && w2[1] <= w2[2] &&
		w1[1] > scalar(0) && w1[1] <= w1[2])
	{
		b3Vec3 N = b3Cross(E1, E2);
		scalar LN = N.Normalize();
		B3_ASSERT(LN > scalar(0));
		if (b3Dot(N, P1 - C1) < scalar(0))
		{
			N = -N;
		}

		b3FeaturePair pair = b3MakePair(index1, index1 + 1, index2, index2 + 1);

		manifold.pointCount = 1;
		manifold.points[0].localNormal1 = b3MulC(xf1.rotation, N);
		manifold.points[0].localPoint1 = b3MulT(xf1, c1);
		manifold.points[0].localPoint2 = b3MulT(xf2, c2);
		manifold.points[0].key = b3MakeKey(pair);
		manifold.points[0].featurePair = pair;
		manifold.points[0].edgeContact = true;
	}
}

static void b3RebuildFaceContact(b3Manifold& manifold,
	const b3Transform& xf1, u32 index1, const b3HullShape* s1,
	const b3Transform& xf2, const b3HullShape* s2, bool flipNormal)
{
	const b3Body* body1 = s1->GetBody();
	const b3Body* body2 = s2->GetBody();

	const b3Sweep& sweep1 = body1->GetSweep();
	b3Quat q10 = sweep1.orientation0;
	b3Quat q1 = sweep1.orientation;

	const b3Sweep& sweep2 = body2->GetSweep();
	b3Quat q20 = sweep2.orientation0;
	b3Quat q2 = sweep2.orientation;

	// Check if the relative orientation has changed.
	// Here the second orientation seen by the first orientation.
	// dp = p2 - p1
	// dq * q1 = q2
	// dq = inv(q1) * q2

	// The old relative rotation.
	// "q0(2) - q0(1)"
	b3Quat dq0 = b3Conjugate(q10) * q20;

	// The new relative rotation.
	// "q(2) - q(1)"
	b3Quat dq = b3Conjugate(q1) * q2;

	// Relative rotation between the new relative rotation and the old relative rotation.
	// "dq(2) - dq0(1)"
	b3Quat q = b3Conjugate(dq0) * dq;

	// Check the relative absolute cosine because 
	// we want to check orientation similarity.
	const scalar kCosTol = scalar(0.995);

	if (b3Abs(q.s) > kCosTol)
	{
		b3BuildFaceContact(manifold, xf1, index1, s1, xf2, s2, flipNormal);
	}
}

static void b3CollideCache(b3Manifold& manifold,
	const b3Transform& xf1, const b3HullShape* s1,
	const b3Transform& xf2, const b3HullShape* s2,
	b3FeatureCache* cache)
{
	B3_ASSERT(cache->m_featurePair.state == b3SATCacheType::e_empty);

	const b3Hull* hull1 = s1->m_hull;
	scalar r1 = s1->m_radius;

	const b3Hull* hull2 = s2->m_hull;
	scalar r2 = s2->m_radius;

	scalar totalRadius = r1 + r2;

	b3FaceQuery faceQuery1 = b3QueryFaceSeparation(xf1, hull1, xf2, hull2);
	if (faceQuery1.separation > totalRadius)
	{
		// Write a separation cache.
		cache->m_featurePair = b3MakeFeaturePair(b3SATCacheType::e_separation, b3SATFeatureType::e_face1, faceQuery1.index, faceQuery1.index);
		return;
	}

	b3FaceQuery faceQuery2 = b3QueryFaceSeparation(xf2, hull2, xf1, hull1);
	if (faceQuery2.separation > totalRadius)
	{
		// Write a separation cache.
		cache->m_featurePair = b3MakeFeaturePair(b3SATCacheType::e_separation, b3SATFeatureType::e_face2, faceQuery2.index, faceQuery2.index);
		return;
	}

	b3EdgeQuery edgeQuery = b3QueryEdgeSeparation(xf1, hull1, xf2, hull2);
	if (edgeQuery.separation > totalRadius)
	{
		// Write a separation cache.
		cache->m_featurePair = b3MakeFeaturePair(b3SATCacheType::e_separation, b3SATFeatureType::e_edge1, edgeQuery.index1, edgeQuery.index2);
		return;
	}

	const scalar kTol = scalar(0.1) * B3_LINEAR_SLOP;
	if (edgeQuery.separation > b3Max(faceQuery1.separation, faceQuery2.separation) + kTol)
	{
		b3BuildEdgeContact(manifold, xf1, edgeQuery.index1, s1, xf2, edgeQuery.index2, s2);

		// Write an overlap cache.		
		cache->m_featurePair = b3MakeFeaturePair(b3SATCacheType::e_overlap, b3SATFeatureType::e_edge1, edgeQuery.index1, edgeQuery.index2);
		return;
	}
	else
	{
		if (faceQuery1.separation + kTol > faceQuery2.separation)
		{
			b3BuildFaceContact(manifold, xf1, faceQuery1.index, s1, xf2, s2, false);
			if (manifold.pointCount > 0)
			{
				// Write an overlap cache.
				cache->m_featurePair = b3MakeFeaturePair(b3SATCacheType::e_overlap, b3SATFeatureType::e_face1, faceQuery1.index, faceQuery1.index);
				return;
			}
		}
		else
		{
			b3BuildFaceContact(manifold, xf2, faceQuery2.index, s2, xf1, s1, true);
			if (manifold.pointCount > 0)
			{
				// Write an overlap cache.
				cache->m_featurePair = b3MakeFeaturePair(b3SATCacheType::e_overlap, b3SATFeatureType::e_face2, faceQuery2.index, faceQuery2.index);
				return;
			}
		}
	}

	// Heuristic failed. Fallback.
	if (edgeQuery.separation > b3Max(faceQuery1.separation, faceQuery2.separation))
	{
		b3BuildEdgeContact(manifold, xf1, edgeQuery.index1, s1, xf2, edgeQuery.index2, s2);
		// Write an overlap cache.		
		cache->m_featurePair = b3MakeFeaturePair(b3SATCacheType::e_overlap, b3SATFeatureType::e_edge1, edgeQuery.index1, edgeQuery.index2);
		return;
	}
	else
	{
		if (faceQuery1.separation > faceQuery2.separation)
		{
			b3BuildFaceContact(manifold, xf1, faceQuery1.index, s1, xf2, s2, false);
			if (manifold.pointCount > 0)
			{
				// Write an overlap cache.
				cache->m_featurePair = b3MakeFeaturePair(b3SATCacheType::e_overlap, b3SATFeatureType::e_face1, faceQuery1.index, faceQuery1.index);
				return;
			}
		}
		else
		{
			b3BuildFaceContact(manifold, xf2, faceQuery2.index, s2, xf1, s1, true);
			if (manifold.pointCount > 0)
			{
				// Write an overlap cache.
				cache->m_featurePair = b3MakeFeaturePair(b3SATCacheType::e_overlap, b3SATFeatureType::e_face2, faceQuery2.index, faceQuery2.index);
				return;
			}
		}
	}

	// When both convex hulls are not simplified clipping might fail and create no contact points.
	// For example, when a hull contains tiny faces, coplanar faces, and/or non-sharped edges.
	// So we need to use an heuristic to create a correct contact point.
}

static void b3CollideHulls(b3Manifold& manifold,
	const b3Transform& xf1, const b3HullShape* s1,
	const b3Transform& xf2, const b3HullShape* s2,
	b3FeatureCache* cache)
{
	const b3Hull* hull1 = s1->m_hull;
	scalar r1 = s1->m_radius;

	const b3Hull* hull2 = s2->m_hull;
	scalar r2 = s2->m_radius;

	scalar totalRadius = r1 + r2;

	// Read cache
	b3SATCacheType state0 = cache->m_featurePair.state;
	b3SATCacheType state1 = cache->ReadState(xf1, hull1, xf2, hull2, totalRadius);

	if (state0 == b3SATCacheType::e_separation &&
		state1 == b3SATCacheType::e_separation)
	{
		// Separation cache hit.
		++b3_convexCacheHits;
		return;
	}

	if (state0 == b3SATCacheType::e_overlap &&
		state1 == b3SATCacheType::e_overlap)
	{
		// Try to rebuild or reclip the features.
		switch (cache->m_featurePair.type)
		{
		case b3SATFeatureType::e_edge1:
		{
			b3RebuildEdgeContact(manifold, xf1, cache->m_featurePair.index1, s1, xf2, cache->m_featurePair.index2, s2);
			break;
		}
		case b3SATFeatureType::e_face1:
		{
			b3RebuildFaceContact(manifold, xf1, cache->m_featurePair.index1, s1, xf2, s2, false);
			break;
		}
		case b3SATFeatureType::e_face2:
		{
			b3RebuildFaceContact(manifold, xf2, cache->m_featurePair.index1, s2, xf1, s1, true);
			break;
		}
		default:
		{
			break;
		}
		}

		if (manifold.pointCount > 0)
		{
			// Overlap cache hit.
			++b3_convexCacheHits;
			return;
		}
	}

	// Separation cache miss.
	// Overlap cache miss.
	// Flush the cache.
	cache->m_featurePair.state = b3SATCacheType::e_empty;
	b3CollideCache(manifold, xf1, s1, xf2, s2, cache);
}