* Replaced drawSurfs qsort algorithm with a radix sort

+ Performance is comparable, although generally slightly
    worse, but...
  + Radix is a stable sort algorithm, so overlapping
    coplanar drawSurfs (i.e. with the same sort key) no
    longer flicker indeterminately
  + Also removes the dubious comment: "FIXME: this was 
    lifted and modified from the microsoft lib source..."
This commit is contained in:
Tim Angus 2005-10-10 00:37:54 +00:00
parent b1ea2ed136
commit 3857ce21c4
1 changed files with 36 additions and 191 deletions

View File

@ -1000,205 +1000,50 @@ DRAWSURF SORTING
*/ */
/* /*
================= ===============
qsort replacement R_Radix
===============
=================
*/ */
static __inline void SWAP_DRAW_SURF(drawSurf_t* a, drawSurf_t* b) static __inline void R_Radix( int byte, int size, drawSurf_t *source, drawSurf_t *dest )
{ {
drawSurf_t t; int count[ 256 ] = { 0 };
memcpy(&t, a, sizeof(t)); int index[ 256 ];
memcpy(a, b, sizeof(t)); int i;
memcpy(b, &t, sizeof(t)); unsigned char *sortKey = NULL;
unsigned char *end = NULL;
sortKey = ( (unsigned char *)&source[ 0 ].sort ) + byte;
end = sortKey + ( size * sizeof( drawSurf_t ) );
for( ; sortKey < end; sortKey += sizeof( drawSurf_t ) )
++count[ *sortKey ];
index[ 0 ] = 0;
for( i = 1; i < 256; ++i )
index[ i ] = index[ i - 1 ] + count[ i - 1 ];
sortKey = ( (unsigned char *)&source[ 0 ].sort ) + byte;
for( i = 0; i < size; ++i, sortKey += sizeof( drawSurf_t ) )
dest[ index[ *sortKey ]++ ] = source[ i ];
} }
/* this parameter defines the cutoff between using quick sort and /*
insertion sort for arrays; arrays with lengths shorter or equal to the ===============
below value use insertion sort */ R_RadixSort
#define CUTOFF 8 /* testing shows that this is good value */ Radix sort with 4 byte size buckets
===============
static void shortsort( drawSurf_t *lo, drawSurf_t *hi ) { */
drawSurf_t *p, *max; static void R_RadixSort( drawSurf_t *source, int size )
while (hi > lo) {
max = lo;
for (p = lo + 1; p <= hi; p++ ) {
if ( p->sort > max->sort ) {
max = p;
}
}
SWAP_DRAW_SURF(max, hi);
hi--;
}
}
/* sort the array between lo and hi (inclusive)
FIXME: this was lifted and modified from the microsoft lib source...
*/
void qsortFast (
void *base,
unsigned num,
unsigned width
)
{ {
char *lo, *hi; /* ends of sub-array currently sorting */ static drawSurf_t scratch[ MAX_DRAWSURFS ];
char *mid; /* points to middle of subarray */
char *loguy, *higuy; /* traveling pointers for partition step */
unsigned size; /* size of the sub-array */
char *lostk[30], *histk[30];
int stkptr; /* stack for saving sub-array to be processed */
#if 0 R_Radix( 0, size, source, scratch );
if ( sizeof(drawSurf_t) != 8 ) { R_Radix( 1, size, scratch, source );
ri.Error( ERR_DROP, "change SWAP_DRAW_SURF macro" ); R_Radix( 2, size, source, scratch );
} R_Radix( 3, size, scratch, source );
#endif
/* Note: the number of stack entries required is no more than
1 + log2(size), so 30 is sufficient for any array */
if (num < 2 || width == 0)
return; /* nothing to do */
stkptr = 0; /* initialize stack */
lo = base;
hi = (char *)base + width * (num-1); /* initialize limits */
/* this entry point is for pseudo-recursion calling: setting
lo and hi and jumping to here is like recursion, but stkptr is
prserved, locals aren't, so we preserve stuff on the stack */
recurse:
size = (hi - lo) / width + 1; /* number of el's to sort */
/* below a certain size, it is faster to use a O(n^2) sorting method */
if (size <= CUTOFF) {
shortsort((drawSurf_t *)lo, (drawSurf_t *)hi);
}
else {
/* First we pick a partititioning element. The efficiency of the
algorithm demands that we find one that is approximately the
median of the values, but also that we select one fast. Using
the first one produces bad performace if the array is already
sorted, so we use the middle one, which would require a very
wierdly arranged array for worst case performance. Testing shows
that a median-of-three algorithm does not, in general, increase
performance. */
mid = lo + (size / 2) * width; /* find middle element */
SWAP_DRAW_SURF((drawSurf_t *)mid, (drawSurf_t *)lo); /* swap it to beginning of array */
/* We now wish to partition the array into three pieces, one
consisiting of elements <= partition element, one of elements
equal to the parition element, and one of element >= to it. This
is done below; comments indicate conditions established at every
step. */
loguy = lo;
higuy = hi + width;
/* Note that higuy decreases and loguy increases on every iteration,
so loop must terminate. */
for (;;) {
/* lo <= loguy < hi, lo < higuy <= hi + 1,
A[i] <= A[lo] for lo <= i <= loguy,
A[i] >= A[lo] for higuy <= i <= hi */
do {
loguy += width;
} while (loguy <= hi &&
( ((drawSurf_t *)loguy)->sort <= ((drawSurf_t *)lo)->sort ) );
/* lo < loguy <= hi+1, A[i] <= A[lo] for lo <= i < loguy,
either loguy > hi or A[loguy] > A[lo] */
do {
higuy -= width;
} while (higuy > lo &&
( ((drawSurf_t *)higuy)->sort >= ((drawSurf_t *)lo)->sort ) );
/* lo-1 <= higuy <= hi, A[i] >= A[lo] for higuy < i <= hi,
either higuy <= lo or A[higuy] < A[lo] */
if (higuy < loguy)
break;
/* if loguy > hi or higuy <= lo, then we would have exited, so
A[loguy] > A[lo], A[higuy] < A[lo],
loguy < hi, highy > lo */
SWAP_DRAW_SURF((drawSurf_t *)loguy, (drawSurf_t *)higuy);
/* A[loguy] < A[lo], A[higuy] > A[lo]; so condition at top
of loop is re-established */
}
/* A[i] >= A[lo] for higuy < i <= hi,
A[i] <= A[lo] for lo <= i < loguy,
higuy < loguy, lo <= higuy <= hi
implying:
A[i] >= A[lo] for loguy <= i <= hi,
A[i] <= A[lo] for lo <= i <= higuy,
A[i] = A[lo] for higuy < i < loguy */
SWAP_DRAW_SURF((drawSurf_t *)lo, (drawSurf_t *)higuy); /* put partition element in place */
/* OK, now we have the following:
A[i] >= A[higuy] for loguy <= i <= hi,
A[i] <= A[higuy] for lo <= i < higuy
A[i] = A[lo] for higuy <= i < loguy */
/* We've finished the partition, now we want to sort the subarrays
[lo, higuy-1] and [loguy, hi].
We do the smaller one first to minimize stack usage.
We only sort arrays of length 2 or more.*/
if ( higuy - 1 - lo >= hi - loguy ) {
if (lo + width < higuy) {
lostk[stkptr] = lo;
histk[stkptr] = higuy - width;
++stkptr;
} /* save big recursion for later */
if (loguy < hi) {
lo = loguy;
goto recurse; /* do small recursion */
}
}
else {
if (loguy < hi) {
lostk[stkptr] = loguy;
histk[stkptr] = hi;
++stkptr; /* save big recursion for later */
}
if (lo + width < higuy) {
hi = higuy - width;
goto recurse; /* do small recursion */
}
}
}
/* We have sorted the array, except for any pending sorts on the stack.
Check if there are any, and do them. */
--stkptr;
if (stkptr >= 0) {
lo = lostk[stkptr];
hi = histk[stkptr];
goto recurse; /* pop subarray from stack */
}
else
return; /* all subarrays done */
} }
//========================================================================================== //==========================================================================================
/* /*
@ -1261,7 +1106,7 @@ void R_SortDrawSurfs( drawSurf_t *drawSurfs, int numDrawSurfs ) {
} }
// sort the drawsurfs by sort type, then orientation, then shader // sort the drawsurfs by sort type, then orientation, then shader
qsortFast (drawSurfs, numDrawSurfs, sizeof(drawSurf_t) ); R_RadixSort( drawSurfs, numDrawSurfs );
// check for any pass through drawing, which // check for any pass through drawing, which
// may cause another view to be rendered first // may cause another view to be rendered first