mirror of
https://github.com/UberGames/ioef.git
synced 2024-11-30 16:01:46 +00:00
* Replaced drawSurfs qsort algorithm with a radix sort
+ Performance is comparable, although generally slightly worse, but... + Radix is a stable sort algorithm, so overlapping coplanar drawSurfs (i.e. with the same sort key) no longer flicker indeterminately + Also removes the dubious comment: "FIXME: this was lifted and modified from the microsoft lib source..."
This commit is contained in:
parent
b1ea2ed136
commit
3857ce21c4
1 changed files with 36 additions and 191 deletions
|
@ -1000,205 +1000,50 @@ DRAWSURF SORTING
|
|||
*/
|
||||
|
||||
/*
|
||||
=================
|
||||
qsort replacement
|
||||
|
||||
=================
|
||||
===============
|
||||
R_Radix
|
||||
===============
|
||||
*/
|
||||
static __inline void SWAP_DRAW_SURF(drawSurf_t* a, drawSurf_t* b)
|
||||
static __inline void R_Radix( int byte, int size, drawSurf_t *source, drawSurf_t *dest )
|
||||
{
|
||||
drawSurf_t t;
|
||||
memcpy(&t, a, sizeof(t));
|
||||
memcpy(a, b, sizeof(t));
|
||||
memcpy(b, &t, sizeof(t));
|
||||
int count[ 256 ] = { 0 };
|
||||
int index[ 256 ];
|
||||
int i;
|
||||
unsigned char *sortKey = NULL;
|
||||
unsigned char *end = NULL;
|
||||
|
||||
sortKey = ( (unsigned char *)&source[ 0 ].sort ) + byte;
|
||||
end = sortKey + ( size * sizeof( drawSurf_t ) );
|
||||
for( ; sortKey < end; sortKey += sizeof( drawSurf_t ) )
|
||||
++count[ *sortKey ];
|
||||
|
||||
index[ 0 ] = 0;
|
||||
|
||||
for( i = 1; i < 256; ++i )
|
||||
index[ i ] = index[ i - 1 ] + count[ i - 1 ];
|
||||
|
||||
sortKey = ( (unsigned char *)&source[ 0 ].sort ) + byte;
|
||||
for( i = 0; i < size; ++i, sortKey += sizeof( drawSurf_t ) )
|
||||
dest[ index[ *sortKey ]++ ] = source[ i ];
|
||||
}
|
||||
|
||||
/* this parameter defines the cutoff between using quick sort and
|
||||
insertion sort for arrays; arrays with lengths shorter or equal to the
|
||||
below value use insertion sort */
|
||||
/*
|
||||
===============
|
||||
R_RadixSort
|
||||
|
||||
#define CUTOFF 8 /* testing shows that this is good value */
|
||||
|
||||
static void shortsort( drawSurf_t *lo, drawSurf_t *hi ) {
|
||||
drawSurf_t *p, *max;
|
||||
|
||||
while (hi > lo) {
|
||||
max = lo;
|
||||
for (p = lo + 1; p <= hi; p++ ) {
|
||||
if ( p->sort > max->sort ) {
|
||||
max = p;
|
||||
}
|
||||
}
|
||||
SWAP_DRAW_SURF(max, hi);
|
||||
hi--;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* sort the array between lo and hi (inclusive)
|
||||
FIXME: this was lifted and modified from the microsoft lib source...
|
||||
*/
|
||||
|
||||
void qsortFast (
|
||||
void *base,
|
||||
unsigned num,
|
||||
unsigned width
|
||||
)
|
||||
Radix sort with 4 byte size buckets
|
||||
===============
|
||||
*/
|
||||
static void R_RadixSort( drawSurf_t *source, int size )
|
||||
{
|
||||
char *lo, *hi; /* ends of sub-array currently sorting */
|
||||
char *mid; /* points to middle of subarray */
|
||||
char *loguy, *higuy; /* traveling pointers for partition step */
|
||||
unsigned size; /* size of the sub-array */
|
||||
char *lostk[30], *histk[30];
|
||||
int stkptr; /* stack for saving sub-array to be processed */
|
||||
static drawSurf_t scratch[ MAX_DRAWSURFS ];
|
||||
|
||||
#if 0
|
||||
if ( sizeof(drawSurf_t) != 8 ) {
|
||||
ri.Error( ERR_DROP, "change SWAP_DRAW_SURF macro" );
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Note: the number of stack entries required is no more than
|
||||
1 + log2(size), so 30 is sufficient for any array */
|
||||
|
||||
if (num < 2 || width == 0)
|
||||
return; /* nothing to do */
|
||||
|
||||
stkptr = 0; /* initialize stack */
|
||||
|
||||
lo = base;
|
||||
hi = (char *)base + width * (num-1); /* initialize limits */
|
||||
|
||||
/* this entry point is for pseudo-recursion calling: setting
|
||||
lo and hi and jumping to here is like recursion, but stkptr is
|
||||
prserved, locals aren't, so we preserve stuff on the stack */
|
||||
recurse:
|
||||
|
||||
size = (hi - lo) / width + 1; /* number of el's to sort */
|
||||
|
||||
/* below a certain size, it is faster to use a O(n^2) sorting method */
|
||||
if (size <= CUTOFF) {
|
||||
shortsort((drawSurf_t *)lo, (drawSurf_t *)hi);
|
||||
}
|
||||
else {
|
||||
/* First we pick a partititioning element. The efficiency of the
|
||||
algorithm demands that we find one that is approximately the
|
||||
median of the values, but also that we select one fast. Using
|
||||
the first one produces bad performace if the array is already
|
||||
sorted, so we use the middle one, which would require a very
|
||||
wierdly arranged array for worst case performance. Testing shows
|
||||
that a median-of-three algorithm does not, in general, increase
|
||||
performance. */
|
||||
|
||||
mid = lo + (size / 2) * width; /* find middle element */
|
||||
SWAP_DRAW_SURF((drawSurf_t *)mid, (drawSurf_t *)lo); /* swap it to beginning of array */
|
||||
|
||||
|
||||
/* We now wish to partition the array into three pieces, one
|
||||
consisiting of elements <= partition element, one of elements
|
||||
equal to the parition element, and one of element >= to it. This
|
||||
is done below; comments indicate conditions established at every
|
||||
step. */
|
||||
|
||||
loguy = lo;
|
||||
higuy = hi + width;
|
||||
|
||||
/* Note that higuy decreases and loguy increases on every iteration,
|
||||
so loop must terminate. */
|
||||
for (;;) {
|
||||
/* lo <= loguy < hi, lo < higuy <= hi + 1,
|
||||
A[i] <= A[lo] for lo <= i <= loguy,
|
||||
A[i] >= A[lo] for higuy <= i <= hi */
|
||||
|
||||
do {
|
||||
loguy += width;
|
||||
} while (loguy <= hi &&
|
||||
( ((drawSurf_t *)loguy)->sort <= ((drawSurf_t *)lo)->sort ) );
|
||||
|
||||
/* lo < loguy <= hi+1, A[i] <= A[lo] for lo <= i < loguy,
|
||||
either loguy > hi or A[loguy] > A[lo] */
|
||||
|
||||
do {
|
||||
higuy -= width;
|
||||
} while (higuy > lo &&
|
||||
( ((drawSurf_t *)higuy)->sort >= ((drawSurf_t *)lo)->sort ) );
|
||||
|
||||
/* lo-1 <= higuy <= hi, A[i] >= A[lo] for higuy < i <= hi,
|
||||
either higuy <= lo or A[higuy] < A[lo] */
|
||||
|
||||
if (higuy < loguy)
|
||||
break;
|
||||
|
||||
/* if loguy > hi or higuy <= lo, then we would have exited, so
|
||||
A[loguy] > A[lo], A[higuy] < A[lo],
|
||||
loguy < hi, highy > lo */
|
||||
|
||||
SWAP_DRAW_SURF((drawSurf_t *)loguy, (drawSurf_t *)higuy);
|
||||
|
||||
/* A[loguy] < A[lo], A[higuy] > A[lo]; so condition at top
|
||||
of loop is re-established */
|
||||
}
|
||||
|
||||
/* A[i] >= A[lo] for higuy < i <= hi,
|
||||
A[i] <= A[lo] for lo <= i < loguy,
|
||||
higuy < loguy, lo <= higuy <= hi
|
||||
implying:
|
||||
A[i] >= A[lo] for loguy <= i <= hi,
|
||||
A[i] <= A[lo] for lo <= i <= higuy,
|
||||
A[i] = A[lo] for higuy < i < loguy */
|
||||
|
||||
SWAP_DRAW_SURF((drawSurf_t *)lo, (drawSurf_t *)higuy); /* put partition element in place */
|
||||
|
||||
/* OK, now we have the following:
|
||||
A[i] >= A[higuy] for loguy <= i <= hi,
|
||||
A[i] <= A[higuy] for lo <= i < higuy
|
||||
A[i] = A[lo] for higuy <= i < loguy */
|
||||
|
||||
/* We've finished the partition, now we want to sort the subarrays
|
||||
[lo, higuy-1] and [loguy, hi].
|
||||
We do the smaller one first to minimize stack usage.
|
||||
We only sort arrays of length 2 or more.*/
|
||||
|
||||
if ( higuy - 1 - lo >= hi - loguy ) {
|
||||
if (lo + width < higuy) {
|
||||
lostk[stkptr] = lo;
|
||||
histk[stkptr] = higuy - width;
|
||||
++stkptr;
|
||||
} /* save big recursion for later */
|
||||
|
||||
if (loguy < hi) {
|
||||
lo = loguy;
|
||||
goto recurse; /* do small recursion */
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (loguy < hi) {
|
||||
lostk[stkptr] = loguy;
|
||||
histk[stkptr] = hi;
|
||||
++stkptr; /* save big recursion for later */
|
||||
}
|
||||
|
||||
if (lo + width < higuy) {
|
||||
hi = higuy - width;
|
||||
goto recurse; /* do small recursion */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* We have sorted the array, except for any pending sorts on the stack.
|
||||
Check if there are any, and do them. */
|
||||
|
||||
--stkptr;
|
||||
if (stkptr >= 0) {
|
||||
lo = lostk[stkptr];
|
||||
hi = histk[stkptr];
|
||||
goto recurse; /* pop subarray from stack */
|
||||
}
|
||||
else
|
||||
return; /* all subarrays done */
|
||||
R_Radix( 0, size, source, scratch );
|
||||
R_Radix( 1, size, scratch, source );
|
||||
R_Radix( 2, size, source, scratch );
|
||||
R_Radix( 3, size, scratch, source );
|
||||
}
|
||||
|
||||
|
||||
//==========================================================================================
|
||||
|
||||
/*
|
||||
|
@ -1261,7 +1106,7 @@ void R_SortDrawSurfs( drawSurf_t *drawSurfs, int numDrawSurfs ) {
|
|||
}
|
||||
|
||||
// sort the drawsurfs by sort type, then orientation, then shader
|
||||
qsortFast (drawSurfs, numDrawSurfs, sizeof(drawSurf_t) );
|
||||
R_RadixSort( drawSurfs, numDrawSurfs );
|
||||
|
||||
// check for any pass through drawing, which
|
||||
// may cause another view to be rendered first
|
||||
|
|
Loading…
Reference in a new issue