- The DSimpleCanvas constructor now fills MemBuffer with zeros.

- Fixed: If the FBTexture wasn't exactly the same size as the screen,
  D3DFB::PaintToWindow() would still lock it with D3DLOCK_DISCARD. Alas,
  I saw no speedup for using a dirty region. (Side note: The Radeons are
  apparently slower compared to DirectDraw because they must do
  power-of-2 textures. If they ever add non-power-of-2 support like nvidia,
  I assume they will also see a speed gain.)
- Changed fb_d3d9.cpp so that instead of trying to compensate for Geforce
  off-by-one errors in the pixel shader, it automatically detects where
  the error occurs and modifies the way the palette is uploaded to
  compensate. Palette color 255 is then represented using the texture
  border color instead of actually being part of the palette. This should
  work correctly with all cards, since I had a report of an FX where the
  off-by-one occurred in a different spot from the place where I observed
  it on a 6 and 7 series cards. Since the shader now has one fewer
  instruction, I notice a very marginal speedup. (Interestingly, removing
  the flash blending from the shader had no perceivable performance gain.)


SVN r399 (trunk)
This commit is contained in:
Randy Heit 2006-12-01 01:17:45 +00:00
parent 37e8533773
commit 2dcc70dd31
5 changed files with 328 additions and 103 deletions

View file

@ -1,3 +1,22 @@
November 30, 2006
- The DSimpleCanvas constructor now fills MemBuffer with zeros.
- Fixed: If the FBTexture wasn't exactly the same size as the screen,
D3DFB::PaintToWindow() would still lock it with D3DLOCK_DISCARD. Alas,
I saw no speedup for using a dirty region. (Side note: The Radeons are
apparently slower compared to DirectDraw because they must do
power-of-2 textures. If they ever add non-power-of-2 support like nvidia,
I assume they will also see a speed gain.)
- Changed fb_d3d9.cpp so that instead of trying to compensate for Geforce
off-by-one errors in the pixel shader, it automatically detects where
the error occurs and modifies the way the palette is uploaded to
compensate. Palette color 255 is then represented using the texture
border color instead of actually being part of the palette. This should
work correctly with all cards, since I had a report of an FX where the
off-by-one occurred in a different spot from the place where I observed
it on a 6 and 7 series cards. Since the shader now has one fewer
instruction, I notice a very marginal speedup. (Interestingly, removing
the flash blending from the shader had no perceivable performance gain.)
November 29, 2006 (Changes by Graf Zahl) November 29, 2006 (Changes by Graf Zahl)
- Fixed: The DECORATE expression evaluator evaluated operators of same - Fixed: The DECORATE expression evaluator evaluated operators of same
precedence right to left instead of left to right. precedence right to left instead of left to right.
@ -21,6 +40,9 @@ November 28, 2006 (Changes by Graf Zahl)
November 28, 2006 November 28, 2006
- Started adding action function declarations to objects. - Started adding action function declarations to objects.
- Added integer constant declarations to objects. - Added integer constant declarations to objects.
- Added some new token-based functions to sc_man.cpp that know about keywords
and record proper type information, so parsers don't need to treat
everything as strings.
- Added a simple symbol table to PClass. - Added a simple symbol table to PClass.
November 27, 2006 (Changes by Graf Zahl) November 27, 2006 (Changes by Graf Zahl)

View file

@ -2598,6 +2598,8 @@ static void ActorActionDef (AActor *defaults, Baggage &bag)
SC_MustGetToken(TK_Identifier); SC_MustGetToken(TK_Identifier);
funcname = sc_Name; funcname = sc_Name;
SC_MustGetToken('('); SC_MustGetToken('(');
if (!SC_CheckToken(')'))
{
while (sc_TokenType != ')') while (sc_TokenType != ')')
{ {
int flags = 0; int flags = 0;
@ -2626,16 +2628,14 @@ static void ActorActionDef (AActor *defaults, Baggage &bag)
break; break;
} }
} }
if (flags != 0)
{
SC_MustGetAnyToken();
}
switch (sc_TokenType) switch (sc_TokenType)
{ {
case TK_Bool: type = 'i'; break;
case TK_Int: type = 'i'; break; case TK_Int: type = 'i'; break;
case TK_Float: type = 'f'; break; case TK_Float: type = 'f'; break;
case TK_Sound: type = 's'; break; case TK_Sound: type = 's'; break;
case TK_String: type = 't'; break; case TK_String: type = 't'; break;
case TK_Name: type = 't'; break;
case TK_State: type = 'l'; break; case TK_State: type = 'l'; break;
case TK_Color: type = 'c'; break; case TK_Color: type = 'c'; break;
case TK_Class: case TK_Class:
@ -2670,9 +2670,9 @@ static void ActorActionDef (AActor *defaults, Baggage &bag)
type -= 'a' - 'A'; type -= 'a' - 'A';
break; break;
} }
#undef OPTIONAL #undef OPTIONAL
#undef EVAL #undef EVAL
#undef EVALNOT #undef EVALNOT
args += type; args += type;
SC_MustGetAnyToken(); SC_MustGetAnyToken();
if (sc_TokenType != ',' && sc_TokenType != ')') if (sc_TokenType != ',' && sc_TokenType != ')')
@ -2680,6 +2680,8 @@ static void ActorActionDef (AActor *defaults, Baggage &bag)
SC_ScriptError ("Expected ',' or ')' but got %s instead", SC_TokenName(sc_TokenType, sc_String).GetChars()); SC_ScriptError ("Expected ',' or ')' but got %s instead", SC_TokenName(sc_TokenType, sc_String).GetChars());
} }
} }
}
SC_MustGetToken(';');
PSymbolActionFunction *sym = new PSymbolActionFunction; PSymbolActionFunction *sym = new PSymbolActionFunction;
sym->SymbolName = funcname; sym->SymbolName = funcname;
sym->SymbolType = SYM_ActionFunction; sym->SymbolType = SYM_ActionFunction;

View file

@ -595,6 +595,7 @@ DSimpleCanvas::DSimpleCanvas (int width, int height)
} }
} }
MemBuffer = new BYTE[Pitch * height]; MemBuffer = new BYTE[Pitch * height];
memset (MemBuffer, 0, Pitch * height);
} }
DSimpleCanvas::~DSimpleCanvas () DSimpleCanvas::~DSimpleCanvas ()

View file

@ -116,20 +116,12 @@ float4 InvFlash : register(c1);
float4 main (float2 texCoord : TEXCOORD0) : COLOR float4 main (float2 texCoord : TEXCOORD0) : COLOR
{ {
half4 index = tex2D (Image, texCoord); float4 index = tex2D (Image, texCoord);
// For some reason, this adjustment is needed on NVidia hardware.
// If this is not done, then all palette index >=240 look up
// palette index + 1. ATI behaves as expacted and does not need
// this adjustment. Fortunately, this produces correct results
// on both hardware with no perceptible performance impact, so
// I only need to use one shader.
index.x = clamp(index.x - 7.65931418e-6, 0.0, 1.0);
float4 rgb = tex2D (Palette, index); float4 rgb = tex2D (Palette, index);
return Flash + rgb * InvFlash; return Flash + rgb * InvFlash;
} }
#endif
#if 0
// //
// Generated by Microsoft (R) D3DX9 Shader Compiler 9.15.779.0000 // Generated by Microsoft (R) D3DX9 Shader Compiler 9.15.779.0000
// //
@ -155,14 +147,12 @@ float4 main (float2 texCoord : TEXCOORD0) : COLOR
// //
ps_1_4 ps_1_4
def c2, -7.65931418e-006, 0, 0, 0
texld r0, t0 texld r0, t0
add_sat r0.x, r0.x, c2.x
phase phase
texld r1, r0 texld r1, r0
mad r0, r1, c1, c0 mad r0, r1, c1, c0
// approximately 4 instruction slots used (2 texture, 2 arithmetic) // approximately 3 instruction slots used (2 texture, 1 arithmetic)
#endif #endif
const DWORD PalTexShaderDef[] = const DWORD PalTexShaderDef[] =
@ -177,10 +167,9 @@ const DWORD PalTexShaderDef[] =
0x46766e49, 0x6873616c, 0x6c615000, 0x65747465, 0x5f737000, 0x00345f31, 0x46766e49, 0x6873616c, 0x6c615000, 0x65747465, 0x5f737000, 0x00345f31,
0x7263694d, 0x666f736f, 0x52282074, 0x33442029, 0x20395844, 0x64616853, 0x7263694d, 0x666f736f, 0x52282074, 0x33442029, 0x20395844, 0x64616853,
0x43207265, 0x69706d6f, 0x2072656c, 0x35312e39, 0x3937372e, 0x3030302e, 0x43207265, 0x69706d6f, 0x2072656c, 0x35312e39, 0x3937372e, 0x3030302e,
0xabab0030, 0x00000051, 0xa00f0002, 0xb7008081, 0x00000000, 0x00000000, 0xabab0030, 0x00000042, 0x800f0000, 0xb0e40000, 0x0000fffd, 0x00000042,
0x00000000, 0x00000042, 0x800f0000, 0xb0e40000, 0x00000002, 0x80110000, 0x800f0001, 0x80e40000, 0x00000004, 0x800f0000, 0x80e40001, 0xa0e40001,
0x80000000, 0xa0000002, 0x0000fffd, 0x00000042, 0x800f0001, 0x80e40000, 0xa0e40000, 0x0000ffff
0x00000004, 0x800f0000, 0x80e40001, 0xa0e40001, 0xa0e40000, 0x0000ffff
}; };
// PUBLIC DATA DEFINITIONS ------------------------------------------------- // PUBLIC DATA DEFINITIONS -------------------------------------------------
@ -201,6 +190,7 @@ D3DFB::D3DFB (int width, int height, bool fullscreen)
FBFormat = D3DFMT_UNKNOWN; FBFormat = D3DFMT_UNKNOWN;
PalFormat = D3DFMT_UNKNOWN; PalFormat = D3DFMT_UNKNOWN;
VSync = vid_vsync; VSync = vid_vsync;
OffByOneAt = -1;
Gamma = 1.0; Gamma = 1.0;
memset (FlashConstants, 0, sizeof(FlashConstants)); memset (FlashConstants, 0, sizeof(FlashConstants));
@ -258,10 +248,6 @@ D3DFB::D3DFB (int width, int height, bool fullscreen)
if (D3DDevice != NULL) if (D3DDevice != NULL)
{ {
CreateResources (); CreateResources ();
D3DDevice->Clear (0, NULL, D3DCLEAR_TARGET, D3DCOLOR_XRGB(0,0,0), 1.f, 0);
D3DDevice->BeginScene();
D3DDevice->EndScene();
D3DDevice->Present(NULL, NULL, NULL, NULL);
} }
} }
@ -270,12 +256,7 @@ D3DFB::~D3DFB ()
ReleaseResources (); ReleaseResources ();
if (D3DDevice != NULL) if (D3DDevice != NULL)
{ {
// Do not release the D3DDevice in fullscreen mode.
D3DPRESENT_PARAMETERS d3dpp;
FillPresentParameters (&d3dpp, false, true);
//D3DDevice->Reset (&d3dpp);
D3DDevice->Release(); D3DDevice->Release();
//Sleep (1000);
} }
} }
@ -393,6 +374,194 @@ bool D3DFB::Reset ()
return true; return true;
} }
//==========================================================================
//
// DoOffByOneCheck
//
// Since NVidia hardware has an off-by-one error in the pixel shader.
// On a Geforce 7950GT and a 6200, I have witnessed it skip palette entry
// 240. I have a report that an FX card skips in a totally different spot.
// So rather than try and detect it in the shader, we do it here and
// compensate when uploading the palette and when drawing by setting the
// sampler mode for the palette to border and making the border color the
// final color in the palette.
//
// Interestingly, a Radeon x300 doesn't have this problem. I am curious
// if other ATI hardware is the same.
//
//==========================================================================
void D3DFB::DoOffByOneCheck ()
{
IDirect3DSurface9 *savedrendertarget;
IDirect3DSurface9 *testsurf, *readsurf;
D3DSURFACE_DESC desc;
D3DLOCKED_RECT lockrect;
RECT testrect = { 0, 0, 256, 1 };
float texright = 256.f / float(FBWidth);
float texbot = 1.f / float(FBHeight);
FBVERTEX verts[4] =
{
{ -0.5f, -0.5f, 0.5f, 1.f, 0.f, 0.f },
{ 255.5f, -0.5f, 0.5f, 1.f, texright, 0.f },
{ 255.5f, 0.5f, 0.5f, 1.f, texright, texbot },
{ -0.5f, 0.5f, 0.5f, 1.f, 0.f, texbot }
};
float flash[2][4] =
{
{ 0.f, 0.f, 0.f, 0.f },
{ 1.f, 1.f, 1.f, 1.f }
};
union
{
BYTE Pal32[256][4];
WORD Pal16[256];
};
int i, c;
if (OffByOneAt >= 0)
{
return;
}
// Create an easily recognizable R3G3B2 palette.
if (PalFormat == D3DFMT_A8R8G8B8)
{
for (i = 0; i < 256; ++i)
{
Pal32[i][0] = (i & 0x03) << 6; // blue
Pal32[i][1] = (i & 0x1C) << 3; // green
Pal32[i][2] = (i & 0xE0); // red;
Pal32[i][3] = 255;
}
}
else
{
for (i = 0; i < 256; ++i)
{
Pal16[i] = ((i & 0xE0) << 8) | // red
((i & 0x1C) << 6) | // green
((i & 0x03) << 3); // blue
}
}
// Upload the palette
if (SUCCEEDED(PaletteTexture->LockRect (0, &lockrect, NULL, 0)))
{
memcpy (lockrect.pBits, Pal32, 256 * ((PalFormat == D3DFMT_A8R8G8B8) ? 4 : 2));
PaletteTexture->UnlockRect (0);
}
else
{
return;
}
// Prepare a texture with values 0-256.
if (SUCCEEDED(FBTexture->LockRect (0, &lockrect, &testrect, 0)))
{
for (i = 0; i < 256; ++i)
{
((BYTE *)lockrect.pBits)[i] = i;
}
FBTexture->UnlockRect (0);
}
else
{
return;
}
// Create a render target that we can draw it to.
if (FAILED(D3DDevice->GetRenderTarget (0, &savedrendertarget)))
{
return;
}
if (FAILED(D3DDevice->CreateRenderTarget (256, 1, PalFormat, D3DMULTISAMPLE_NONE, 0, FALSE, &testsurf, NULL)))
{
return;
}
if (FAILED(D3DDevice->CreateOffscreenPlainSurface (256, 1, PalFormat, D3DPOOL_SYSTEMMEM, &readsurf, NULL)))
{
testsurf->Release();
return;
}
if (FAILED(D3DDevice->SetRenderTarget (0, testsurf)))
{
testsurf->Release();
readsurf->Release();
return;
}
// Write it to the render target using the pixel shader.
D3DDevice->BeginScene();
D3DDevice->SetTexture (0, FBTexture);
D3DDevice->SetTexture (1, PaletteTexture);
D3DDevice->SetFVF (D3DFVF_FBVERTEX);
D3DDevice->SetPixelShader (PalTexShader);
D3DDevice->SetPixelShaderConstantF (0, flash[0], 2);
D3DDevice->DrawPrimitiveUP (D3DPT_TRIANGLEFAN, 2, verts, sizeof(FBVERTEX));
D3DDevice->EndScene();
D3DDevice->SetRenderTarget (0, savedrendertarget);
savedrendertarget->Release();
// Now read it back and see where it skips an entry
if (SUCCEEDED(D3DDevice->GetRenderTargetData (testsurf, readsurf)) &&
SUCCEEDED(readsurf->LockRect (&lockrect, &testrect, D3DLOCK_READONLY)))
{
desc.Format = PalFormat;
if (desc.Format == D3DFMT_A8R8G8B8 || desc.Format == D3DFMT_X8R8G8B8)
{
const BYTE *pix = (const BYTE *)lockrect.pBits;
for (i = 0; i < 256; ++i, pix += 4)
{
c = (pix[0] >> 6) | // blue
((pix[1] >> 5) << 2) | // green
((pix[2] >> 5) << 5); // red
if (c != i)
{
break;
}
}
}
else if (desc.Format == D3DFMT_A1R5G5B5 || desc.Format == D3DFMT_X1R5G5B5)
{
const WORD *pix = (const WORD *)lockrect.pBits;
for (i = 0; i < 256; ++i, ++pix)
{
c = ((*pix & 0x0018) >> 3) | // blue
((*pix & 0x0380) >> 5) | // green
((*pix & 0x7C00) >> 7) ; // red
if (c != i)
{
break;
}
}
}
else if (desc.Format == D3DFMT_R5G6B5)
{
const WORD *pix = (const WORD *)lockrect.pBits;
for (i = 0; i < 256; ++i, ++pix)
{
c = ((*pix & 0x0018) >> 3) | // blue
((*pix & 0x0700) >> 6) | // green
((*pix & 0xE000) >> 8) ; // red
if (c != i)
{
break;
}
}
}
else
{
// Huh? What kind of backbuffer is this?
i = 256;
}
}
readsurf->UnlockRect();
readsurf->Release();
testsurf->Release();
OffByOneAt = i;
if (i < 256)
{
D3DDevice->SetSamplerState (1, D3DSAMP_ADDRESSU, D3DTADDRESS_BORDER);
}
}
bool D3DFB::CreateFBTexture () bool D3DFB::CreateFBTexture ()
{ {
if (FAILED(D3DDevice->CreateTexture (Width, Height, 1, D3DUSAGE_DYNAMIC, D3DFMT_L8, D3DPOOL_DEFAULT, &FBTexture, NULL))) if (FAILED(D3DDevice->CreateTexture (Width, Height, 1, D3DUSAGE_DYNAMIC, D3DFMT_L8, D3DPOOL_DEFAULT, &FBTexture, NULL)))
@ -443,7 +612,7 @@ bool D3DFB::CreatePaletteTexture ()
bool D3DFB::CreateVertexes () bool D3DFB::CreateVertexes ()
{ {
float top = (TrueHeight - Height) * 0.5f - 0.5f; float top = (TrueHeight - Height) * 0.5f - 0.5f;
float right = float(Width) + 0.5f; float right = float(Width) - 0.5f;
float bot = float(Height) + top + 1.f; float bot = float(Height) + top + 1.f;
float texright = float(Width) / float(FBWidth); float texright = float(Width) / float(FBWidth);
float texbot = float(Height) / float(FBHeight); float texbot = float(Height) / float(FBHeight);
@ -579,6 +748,7 @@ void D3DFB::Update ()
bool D3DFB::PaintToWindow () bool D3DFB::PaintToWindow ()
{ {
RECT texrect = { 0, 0, Width, Height };
D3DLOCKED_RECT lockrect; D3DLOCKED_RECT lockrect;
HRESULT hr; HRESULT hr;
@ -595,7 +765,8 @@ bool D3DFB::PaintToWindow ()
return false; return false;
} }
} }
if (SUCCEEDED(FBTexture->LockRect (0, &lockrect, NULL, D3DLOCK_DISCARD))) if ((FBWidth == Width && FBHeight == Height && SUCCEEDED(FBTexture->LockRect (0, &lockrect, NULL, D3DLOCK_DISCARD))) ||
SUCCEEDED(FBTexture->LockRect (0, &lockrect, &texrect, 0)))
{ {
if (lockrect.Pitch == Pitch) if (lockrect.Pitch == Pitch)
{ {
@ -636,34 +807,61 @@ bool D3DFB::PaintToWindow ()
void D3DFB::UploadPalette () void D3DFB::UploadPalette ()
{ {
D3DLOCKED_RECT lockrect; D3DLOCKED_RECT lockrect;
int i;
if (OffByOneAt < 0)
{
DoOffByOneCheck ();
}
if (SUCCEEDED(PaletteTexture->LockRect (0, &lockrect, NULL, 0))) if (SUCCEEDED(PaletteTexture->LockRect (0, &lockrect, NULL, 0)))
{ {
NeedPalUpdate = false; // Keep trying to update the palette if we haven't done the off-by-one
// check yet. Otherwise, wait until the next time the palette changes.
NeedPalUpdate = (OffByOneAt < 0);
if (PalFormat == D3DFMT_A8R8G8B8) if (PalFormat == D3DFMT_A8R8G8B8)
{ {
BYTE *pix = (BYTE *)lockrect.pBits; BYTE *pix = (BYTE *)lockrect.pBits;
for (int i = 0; i < 256; ++i, pix += 4) for (i = 0; i < OffByOneAt; ++i, pix += 4)
{ {
pix[0] = GammaTable[SourcePalette[i].b]; pix[0] = GammaTable[SourcePalette[i].b];
pix[1] = GammaTable[SourcePalette[i].g]; pix[1] = GammaTable[SourcePalette[i].g];
pix[2] = GammaTable[SourcePalette[i].r]; pix[2] = GammaTable[SourcePalette[i].r];
pix[3] = 255; pix[3] = 255;
} }
for (; i < 256; ++i, pix += 4)
{
pix[0] = GammaTable[SourcePalette[i-1].b];
pix[1] = GammaTable[SourcePalette[i-1].g];
pix[2] = GammaTable[SourcePalette[i-1].r];
pix[3] = 255;
}
} }
else else
{ {
WORD *pix = (WORD *)lockrect.pBits; WORD *pix = (WORD *)lockrect.pBits;
for (int i = 0; i < 256; ++i, ++pix) for (i = 0; i < OffByOneAt; ++i, ++pix)
{ {
*pix = ((GammaTable[SourcePalette[i].r] >> 3) << 11) | *pix = ((GammaTable[SourcePalette[i].r] >> 3) << 11) |
((GammaTable[SourcePalette[i].g] >> 2) << 5) | ((GammaTable[SourcePalette[i].g] >> 2) << 5) |
(GammaTable[SourcePalette[i].b] >> 3); (GammaTable[SourcePalette[i].b] >> 3);
} }
for (; i < 256; ++i, ++pix)
{
*pix = ((GammaTable[SourcePalette[i-1].r] >> 3) << 11) |
((GammaTable[SourcePalette[i-1].g] >> 2) << 5) |
(GammaTable[SourcePalette[i-1].b] >> 3);
}
} }
PaletteTexture->UnlockRect (0); PaletteTexture->UnlockRect (0);
} }
if (OffByOneAt < 256)
{
D3DDevice->SetSamplerState (1, D3DSAMP_BORDERCOLOR,
D3DCOLOR_XRGB(GammaTable[SourcePalette[255].r],
GammaTable[SourcePalette[255].g],
GammaTable[SourcePalette[255].b]));
}
} }
PalEntry *D3DFB::GetPalette () PalEntry *D3DFB::GetPalette ()

View file

@ -238,6 +238,7 @@ private:
bool CreateFBTexture(); bool CreateFBTexture();
bool CreatePaletteTexture(); bool CreatePaletteTexture();
bool CreateVertexes(); bool CreateVertexes();
void DoOffByOneCheck();
void UploadPalette(); void UploadPalette();
void FillPresentParameters (D3DPRESENT_PARAMETERS *pp, bool fullscreen, bool vsync); void FillPresentParameters (D3DPRESENT_PARAMETERS *pp, bool fullscreen, bool vsync);
bool Reset(); bool Reset();
@ -255,6 +256,7 @@ private:
D3DFORMAT FBFormat; D3DFORMAT FBFormat;
D3DFORMAT PalFormat; D3DFORMAT PalFormat;
int FBWidth, FBHeight; int FBWidth, FBHeight;
int OffByOneAt;
bool VSync; bool VSync;
IDirect3DDevice9 *D3DDevice; IDirect3DDevice9 *D3DDevice;