mirror of
https://git.code.sf.net/p/quake/quakeforge
synced 2024-11-26 22:31:05 +00:00
[sw] Speed up Draw_FadeScreen
It now processes 4 pixels at a time and uses a bit mask instead of a conditional to set 3 of the 4 pixels to black. On top of the 4:1 pixel processing and avoiding inner-loop conditional jumps, gcc unrolls the loop, so Draw_FadeScreen itself is more than 4x as fast as it was. The end result is about 5% (3fps) speedup to timedemo demo1 on my 900MHz EEE Pc when nq has been hacked to always draw the fade-screen.
This commit is contained in:
parent
ef6dd422e5
commit
4b1eb0d760
1 changed files with 9 additions and 8 deletions
|
@ -777,21 +777,22 @@ void
|
||||||
Draw_FadeScreen (void)
|
Draw_FadeScreen (void)
|
||||||
{
|
{
|
||||||
int x, y;
|
int x, y;
|
||||||
byte *pbuf;
|
int height = vid.conheight;
|
||||||
|
int width = vid.conwidth / 4;
|
||||||
|
uint32_t *pbuf;
|
||||||
|
|
||||||
VID_UnlockBuffer ();
|
VID_UnlockBuffer ();
|
||||||
S_ExtraUpdate ();
|
S_ExtraUpdate ();
|
||||||
VID_LockBuffer ();
|
VID_LockBuffer ();
|
||||||
|
|
||||||
for (y = 0; y < vid.conheight; y++) {
|
for (y = 0; y < height; y++) {
|
||||||
unsigned int t;
|
uint32_t mask;
|
||||||
|
|
||||||
pbuf = ((byte *)vid.buffer) + vid.rowbytes * y;
|
pbuf = (uint32_t *) ((byte *)vid.buffer + vid.rowbytes * y);
|
||||||
t = (y & 1) << 1;
|
mask = 0xff << ((y & 1) << 4);
|
||||||
|
|
||||||
for (x = 0; x < vid.conwidth; x++) {
|
for (x = 0; x < width; x++) {
|
||||||
if ((x & 3) != t)
|
*pbuf++ &= mask;
|
||||||
pbuf[x] = 0;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
vr_data.scr_copyeverything = 1;
|
vr_data.scr_copyeverything = 1;
|
||||||
|
|
Loading…
Reference in a new issue