mirror of
https://git.code.sf.net/p/quake/quakeforge
synced 2024-11-26 14:20:59 +00:00
[sw] Speed up Draw_FadeScreen
It now processes 4 pixels at a time and uses a bit mask instead of a conditional to set 3 of the 4 pixels to black. On top of the 4:1 pixel processing and avoiding inner-loop conditional jumps, gcc unrolls the loop, so Draw_FadeScreen itself is more than 4x as fast as it was. The end result is about 5% (3fps) speedup to timedemo demo1 on my 900MHz EEE Pc when nq has been hacked to always draw the fade-screen.
This commit is contained in:
parent
ef6dd422e5
commit
4b1eb0d760
1 changed files with 9 additions and 8 deletions
|
@ -777,21 +777,22 @@ void
|
|||
Draw_FadeScreen (void)
|
||||
{
|
||||
int x, y;
|
||||
byte *pbuf;
|
||||
int height = vid.conheight;
|
||||
int width = vid.conwidth / 4;
|
||||
uint32_t *pbuf;
|
||||
|
||||
VID_UnlockBuffer ();
|
||||
S_ExtraUpdate ();
|
||||
VID_LockBuffer ();
|
||||
|
||||
for (y = 0; y < vid.conheight; y++) {
|
||||
unsigned int t;
|
||||
for (y = 0; y < height; y++) {
|
||||
uint32_t mask;
|
||||
|
||||
pbuf = ((byte *)vid.buffer) + vid.rowbytes * y;
|
||||
t = (y & 1) << 1;
|
||||
pbuf = (uint32_t *) ((byte *)vid.buffer + vid.rowbytes * y);
|
||||
mask = 0xff << ((y & 1) << 4);
|
||||
|
||||
for (x = 0; x < vid.conwidth; x++) {
|
||||
if ((x & 3) != t)
|
||||
pbuf[x] = 0;
|
||||
for (x = 0; x < width; x++) {
|
||||
*pbuf++ &= mask;
|
||||
}
|
||||
}
|
||||
vr_data.scr_copyeverything = 1;
|
||||
|
|
Loading…
Reference in a new issue