From 3857ce21c445b39f94329a4e37f388e94b66bd98 Mon Sep 17 00:00:00 2001 From: Tim Angus Date: Mon, 10 Oct 2005 00:37:54 +0000 Subject: [PATCH] * Replaced drawSurfs qsort algorithm with a radix sort + Performance is comparable, although generally slightly worse, but... + Radix is a stable sort algorithm, so overlapping coplanar drawSurfs (i.e. with the same sort key) no longer flicker indeterminately + Also removes the dubious comment: "FIXME: this was lifted and modified from the microsoft lib source..." --- code/renderer/tr_main.c | 227 +++++++--------------------------------- 1 file changed, 36 insertions(+), 191 deletions(-) diff --git a/code/renderer/tr_main.c b/code/renderer/tr_main.c index 187e3519..7611eaf3 100644 --- a/code/renderer/tr_main.c +++ b/code/renderer/tr_main.c @@ -1000,205 +1000,50 @@ DRAWSURF SORTING */ /* -================= -qsort replacement - -================= +=============== +R_Radix +=============== */ -static __inline void SWAP_DRAW_SURF(drawSurf_t* a, drawSurf_t* b) +static __inline void R_Radix( int byte, int size, drawSurf_t *source, drawSurf_t *dest ) { - drawSurf_t t; - memcpy(&t, a, sizeof(t)); - memcpy(a, b, sizeof(t)); - memcpy(b, &t, sizeof(t)); + int count[ 256 ] = { 0 }; + int index[ 256 ]; + int i; + unsigned char *sortKey = NULL; + unsigned char *end = NULL; + + sortKey = ( (unsigned char *)&source[ 0 ].sort ) + byte; + end = sortKey + ( size * sizeof( drawSurf_t ) ); + for( ; sortKey < end; sortKey += sizeof( drawSurf_t ) ) + ++count[ *sortKey ]; + + index[ 0 ] = 0; + + for( i = 1; i < 256; ++i ) + index[ i ] = index[ i - 1 ] + count[ i - 1 ]; + + sortKey = ( (unsigned char *)&source[ 0 ].sort ) + byte; + for( i = 0; i < size; ++i, sortKey += sizeof( drawSurf_t ) ) + dest[ index[ *sortKey ]++ ] = source[ i ]; } -/* this parameter defines the cutoff between using quick sort and - insertion sort for arrays; arrays with lengths shorter or equal to the - below value use insertion sort */ +/* +=============== +R_RadixSort -#define CUTOFF 8 /* testing shows that this is good value */ - -static void shortsort( drawSurf_t *lo, drawSurf_t *hi ) { - drawSurf_t *p, *max; - - while (hi > lo) { - max = lo; - for (p = lo + 1; p <= hi; p++ ) { - if ( p->sort > max->sort ) { - max = p; - } - } - SWAP_DRAW_SURF(max, hi); - hi--; - } -} - - -/* sort the array between lo and hi (inclusive) -FIXME: this was lifted and modified from the microsoft lib source... - */ - -void qsortFast ( - void *base, - unsigned num, - unsigned width - ) +Radix sort with 4 byte size buckets +=============== +*/ +static void R_RadixSort( drawSurf_t *source, int size ) { - char *lo, *hi; /* ends of sub-array currently sorting */ - char *mid; /* points to middle of subarray */ - char *loguy, *higuy; /* traveling pointers for partition step */ - unsigned size; /* size of the sub-array */ - char *lostk[30], *histk[30]; - int stkptr; /* stack for saving sub-array to be processed */ + static drawSurf_t scratch[ MAX_DRAWSURFS ]; -#if 0 - if ( sizeof(drawSurf_t) != 8 ) { - ri.Error( ERR_DROP, "change SWAP_DRAW_SURF macro" ); - } -#endif - - /* Note: the number of stack entries required is no more than - 1 + log2(size), so 30 is sufficient for any array */ - - if (num < 2 || width == 0) - return; /* nothing to do */ - - stkptr = 0; /* initialize stack */ - - lo = base; - hi = (char *)base + width * (num-1); /* initialize limits */ - - /* this entry point is for pseudo-recursion calling: setting - lo and hi and jumping to here is like recursion, but stkptr is - prserved, locals aren't, so we preserve stuff on the stack */ -recurse: - - size = (hi - lo) / width + 1; /* number of el's to sort */ - - /* below a certain size, it is faster to use a O(n^2) sorting method */ - if (size <= CUTOFF) { - shortsort((drawSurf_t *)lo, (drawSurf_t *)hi); - } - else { - /* First we pick a partititioning element. The efficiency of the - algorithm demands that we find one that is approximately the - median of the values, but also that we select one fast. Using - the first one produces bad performace if the array is already - sorted, so we use the middle one, which would require a very - wierdly arranged array for worst case performance. Testing shows - that a median-of-three algorithm does not, in general, increase - performance. */ - - mid = lo + (size / 2) * width; /* find middle element */ - SWAP_DRAW_SURF((drawSurf_t *)mid, (drawSurf_t *)lo); /* swap it to beginning of array */ - - - /* We now wish to partition the array into three pieces, one - consisiting of elements <= partition element, one of elements - equal to the parition element, and one of element >= to it. This - is done below; comments indicate conditions established at every - step. */ - - loguy = lo; - higuy = hi + width; - - /* Note that higuy decreases and loguy increases on every iteration, - so loop must terminate. */ - for (;;) { - /* lo <= loguy < hi, lo < higuy <= hi + 1, - A[i] <= A[lo] for lo <= i <= loguy, - A[i] >= A[lo] for higuy <= i <= hi */ - - do { - loguy += width; - } while (loguy <= hi && - ( ((drawSurf_t *)loguy)->sort <= ((drawSurf_t *)lo)->sort ) ); - - /* lo < loguy <= hi+1, A[i] <= A[lo] for lo <= i < loguy, - either loguy > hi or A[loguy] > A[lo] */ - - do { - higuy -= width; - } while (higuy > lo && - ( ((drawSurf_t *)higuy)->sort >= ((drawSurf_t *)lo)->sort ) ); - - /* lo-1 <= higuy <= hi, A[i] >= A[lo] for higuy < i <= hi, - either higuy <= lo or A[higuy] < A[lo] */ - - if (higuy < loguy) - break; - - /* if loguy > hi or higuy <= lo, then we would have exited, so - A[loguy] > A[lo], A[higuy] < A[lo], - loguy < hi, highy > lo */ - - SWAP_DRAW_SURF((drawSurf_t *)loguy, (drawSurf_t *)higuy); - - /* A[loguy] < A[lo], A[higuy] > A[lo]; so condition at top - of loop is re-established */ - } - - /* A[i] >= A[lo] for higuy < i <= hi, - A[i] <= A[lo] for lo <= i < loguy, - higuy < loguy, lo <= higuy <= hi - implying: - A[i] >= A[lo] for loguy <= i <= hi, - A[i] <= A[lo] for lo <= i <= higuy, - A[i] = A[lo] for higuy < i < loguy */ - - SWAP_DRAW_SURF((drawSurf_t *)lo, (drawSurf_t *)higuy); /* put partition element in place */ - - /* OK, now we have the following: - A[i] >= A[higuy] for loguy <= i <= hi, - A[i] <= A[higuy] for lo <= i < higuy - A[i] = A[lo] for higuy <= i < loguy */ - - /* We've finished the partition, now we want to sort the subarrays - [lo, higuy-1] and [loguy, hi]. - We do the smaller one first to minimize stack usage. - We only sort arrays of length 2 or more.*/ - - if ( higuy - 1 - lo >= hi - loguy ) { - if (lo + width < higuy) { - lostk[stkptr] = lo; - histk[stkptr] = higuy - width; - ++stkptr; - } /* save big recursion for later */ - - if (loguy < hi) { - lo = loguy; - goto recurse; /* do small recursion */ - } - } - else { - if (loguy < hi) { - lostk[stkptr] = loguy; - histk[stkptr] = hi; - ++stkptr; /* save big recursion for later */ - } - - if (lo + width < higuy) { - hi = higuy - width; - goto recurse; /* do small recursion */ - } - } - } - - /* We have sorted the array, except for any pending sorts on the stack. - Check if there are any, and do them. */ - - --stkptr; - if (stkptr >= 0) { - lo = lostk[stkptr]; - hi = histk[stkptr]; - goto recurse; /* pop subarray from stack */ - } - else - return; /* all subarrays done */ + R_Radix( 0, size, source, scratch ); + R_Radix( 1, size, scratch, source ); + R_Radix( 2, size, source, scratch ); + R_Radix( 3, size, scratch, source ); } - //========================================================================================== /* @@ -1261,7 +1106,7 @@ void R_SortDrawSurfs( drawSurf_t *drawSurfs, int numDrawSurfs ) { } // sort the drawsurfs by sort type, then orientation, then shader - qsortFast (drawSurfs, numDrawSurfs, sizeof(drawSurf_t) ); + R_RadixSort( drawSurfs, numDrawSurfs ); // check for any pass through drawing, which // may cause another view to be rendered first