diff --git a/docs/rh-log.txt b/docs/rh-log.txt index e54bc5d90..c77937ff6 100644 --- a/docs/rh-log.txt +++ b/docs/rh-log.txt @@ -1,3 +1,22 @@ +November 30, 2006 +- The DSimpleCanvas constructor now fills MemBuffer with zeros. +- Fixed: If the FBTexture wasn't exactly the same size as the screen, + D3DFB::PaintToWindow() would still lock it with D3DLOCK_DISCARD. Alas, + I saw no speedup for using a dirty region. (Side note: The Radeons are + apparently slower compared to DirectDraw because they must do + power-of-2 textures. If they ever add non-power-of-2 support like nvidia, + I assume they will also see a speed gain.) +- Changed fb_d3d9.cpp so that instead of trying to compensate for Geforce + off-by-one errors in the pixel shader, it automatically detects where + the error occurs and modifies the way the palette is uploaded to + compensate. Palette color 255 is then represented using the texture + border color instead of actually being part of the palette. This should + work correctly with all cards, since I had a report of an FX where the + off-by-one occurred in a different spot from the place where I observed + it on a 6 and 7 series cards. Since the shader now has one fewer + instruction, I notice a very marginal speedup. (Interestingly, removing + the flash blending from the shader had no perceivable performance gain.) + November 29, 2006 (Changes by Graf Zahl) - Fixed: The DECORATE expression evaluator evaluated operators of same precedence right to left instead of left to right. @@ -21,6 +40,9 @@ November 28, 2006 (Changes by Graf Zahl) November 28, 2006 - Started adding action function declarations to objects. - Added integer constant declarations to objects. +- Added some new token-based functions to sc_man.cpp that know about keywords + and record proper type information, so parsers don't need to treat + everything as strings. - Added a simple symbol table to PClass. November 27, 2006 (Changes by Graf Zahl) diff --git a/src/thingdef.cpp b/src/thingdef.cpp index 04277e310..f5e2c381f 100644 --- a/src/thingdef.cpp +++ b/src/thingdef.cpp @@ -2598,88 +2598,90 @@ static void ActorActionDef (AActor *defaults, Baggage &bag) SC_MustGetToken(TK_Identifier); funcname = sc_Name; SC_MustGetToken('('); - while (sc_TokenType != ')') + if (!SC_CheckToken(')')) { - int flags = 0; - char type = '@'; - - // Retrieve flags before type name - for (;;) + while (sc_TokenType != ')') { - if (SC_CheckToken(TK_Optional)) + int flags = 0; + char type = '@'; + + // Retrieve flags before type name + for (;;) { - flags |= OPTIONAL; + if (SC_CheckToken(TK_Optional)) + { + flags |= OPTIONAL; + } + else if (SC_CheckToken(TK_Eval)) + { + flags |= EVAL; + } + else if (SC_CheckToken(TK_EvalNot)) + { + flags |= EVALNOT; + } + else if (SC_CheckToken(TK_Coerce) || SC_CheckToken(TK_Native)) + { + } + else + { + break; + } } - else if (SC_CheckToken(TK_Eval)) - { - flags |= EVAL; - } - else if (SC_CheckToken(TK_EvalNot)) - { - flags |= EVALNOT; - } - else if (SC_CheckToken(TK_Coerce) || SC_CheckToken(TK_Native)) - { - } - else + switch (sc_TokenType) { + case TK_Bool: type = 'i'; break; + case TK_Int: type = 'i'; break; + case TK_Float: type = 'f'; break; + case TK_Sound: type = 's'; break; + case TK_String: type = 't'; break; + case TK_Name: type = 't'; break; + case TK_State: type = 'l'; break; + case TK_Color: type = 'c'; break; + case TK_Class: + SC_MustGetToken('<'); + SC_MustGetToken(TK_Identifier); + if (sc_Name != NAME_Actor) + { + SC_ScriptError ("Sorry, you can only use class"); + } + SC_MustGetToken('>'); + type = 'm'; + break; + case TK_Ellipsis: + type = '+'; + SC_MustGetToken(')'); + SC_UnGet(); + break; + default: + SC_ScriptError ("Unknown variable type %s", SC_TokenName(sc_TokenType, sc_String).GetChars()); break; } - } - if (flags != 0) - { - SC_MustGetAnyToken(); - } - switch (sc_TokenType) - { - case TK_Int: type = 'i'; break; - case TK_Float: type = 'f'; break; - case TK_Sound: type = 's'; break; - case TK_String: type = 't'; break; - case TK_State: type = 'l'; break; - case TK_Color: type = 'c'; break; - case TK_Class: - SC_MustGetToken('<'); - SC_MustGetToken(TK_Identifier); - if (sc_Name != NAME_Actor) + if (flags & EVALNOT) { - SC_ScriptError ("Sorry, you can only use class"); + type = 'y'; + } + else if (flags & EVAL) + { + type = 'x'; + } + if (!(flags & OPTIONAL)) + { + type -= 'a' - 'A'; + break; + } + #undef OPTIONAL + #undef EVAL + #undef EVALNOT + args += type; + SC_MustGetAnyToken(); + if (sc_TokenType != ',' && sc_TokenType != ')') + { + SC_ScriptError ("Expected ',' or ')' but got %s instead", SC_TokenName(sc_TokenType, sc_String).GetChars()); } - SC_MustGetToken('>'); - type = 'm'; - break; - case TK_Ellipsis: - type = '+'; - SC_MustGetToken(')'); - SC_UnGet(); - break; - default: - SC_ScriptError ("Unknown variable type %s", SC_TokenName(sc_TokenType, sc_String).GetChars()); - break; - } - if (flags & EVALNOT) - { - type = 'y'; - } - else if (flags & EVAL) - { - type = 'x'; - } - if (!(flags & OPTIONAL)) - { - type -= 'a' - 'A'; - break; - } -#undef OPTIONAL -#undef EVAL -#undef EVALNOT - args += type; - SC_MustGetAnyToken(); - if (sc_TokenType != ',' && sc_TokenType != ')') - { - SC_ScriptError ("Expected ',' or ')' but got %s instead", SC_TokenName(sc_TokenType, sc_String).GetChars()); } } + SC_MustGetToken(';'); PSymbolActionFunction *sym = new PSymbolActionFunction; sym->SymbolName = funcname; sym->SymbolType = SYM_ActionFunction; diff --git a/src/v_video.cpp b/src/v_video.cpp index 486677f0c..e1ee86953 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -595,6 +595,7 @@ DSimpleCanvas::DSimpleCanvas (int width, int height) } } MemBuffer = new BYTE[Pitch * height]; + memset (MemBuffer, 0, Pitch * height); } DSimpleCanvas::~DSimpleCanvas () diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index 5bce12564..deb10acd3 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -116,20 +116,12 @@ float4 InvFlash : register(c1); float4 main (float2 texCoord : TEXCOORD0) : COLOR { - half4 index = tex2D (Image, texCoord); - - // For some reason, this adjustment is needed on NVidia hardware. - // If this is not done, then all palette index >=240 look up - // palette index + 1. ATI behaves as expacted and does not need - // this adjustment. Fortunately, this produces correct results - // on both hardware with no perceptible performance impact, so - // I only need to use one shader. - index.x = clamp(index.x - 7.65931418e-6, 0.0, 1.0); - + float4 index = tex2D (Image, texCoord); float4 rgb = tex2D (Palette, index); return Flash + rgb * InvFlash; } - +#endif +#if 0 // // Generated by Microsoft (R) D3DX9 Shader Compiler 9.15.779.0000 // @@ -155,14 +147,12 @@ float4 main (float2 texCoord : TEXCOORD0) : COLOR // ps_1_4 - def c2, -7.65931418e-006, 0, 0, 0 texld r0, t0 - add_sat r0.x, r0.x, c2.x phase texld r1, r0 mad r0, r1, c1, c0 -// approximately 4 instruction slots used (2 texture, 2 arithmetic) +// approximately 3 instruction slots used (2 texture, 1 arithmetic) #endif const DWORD PalTexShaderDef[] = @@ -177,10 +167,9 @@ const DWORD PalTexShaderDef[] = 0x46766e49, 0x6873616c, 0x6c615000, 0x65747465, 0x5f737000, 0x00345f31, 0x7263694d, 0x666f736f, 0x52282074, 0x33442029, 0x20395844, 0x64616853, 0x43207265, 0x69706d6f, 0x2072656c, 0x35312e39, 0x3937372e, 0x3030302e, - 0xabab0030, 0x00000051, 0xa00f0002, 0xb7008081, 0x00000000, 0x00000000, - 0x00000000, 0x00000042, 0x800f0000, 0xb0e40000, 0x00000002, 0x80110000, - 0x80000000, 0xa0000002, 0x0000fffd, 0x00000042, 0x800f0001, 0x80e40000, - 0x00000004, 0x800f0000, 0x80e40001, 0xa0e40001, 0xa0e40000, 0x0000ffff + 0xabab0030, 0x00000042, 0x800f0000, 0xb0e40000, 0x0000fffd, 0x00000042, + 0x800f0001, 0x80e40000, 0x00000004, 0x800f0000, 0x80e40001, 0xa0e40001, + 0xa0e40000, 0x0000ffff }; // PUBLIC DATA DEFINITIONS ------------------------------------------------- @@ -201,6 +190,7 @@ D3DFB::D3DFB (int width, int height, bool fullscreen) FBFormat = D3DFMT_UNKNOWN; PalFormat = D3DFMT_UNKNOWN; VSync = vid_vsync; + OffByOneAt = -1; Gamma = 1.0; memset (FlashConstants, 0, sizeof(FlashConstants)); @@ -258,10 +248,6 @@ D3DFB::D3DFB (int width, int height, bool fullscreen) if (D3DDevice != NULL) { CreateResources (); - D3DDevice->Clear (0, NULL, D3DCLEAR_TARGET, D3DCOLOR_XRGB(0,0,0), 1.f, 0); - D3DDevice->BeginScene(); - D3DDevice->EndScene(); - D3DDevice->Present(NULL, NULL, NULL, NULL); } } @@ -270,12 +256,7 @@ D3DFB::~D3DFB () ReleaseResources (); if (D3DDevice != NULL) { - // Do not release the D3DDevice in fullscreen mode. - D3DPRESENT_PARAMETERS d3dpp; - FillPresentParameters (&d3dpp, false, true); - //D3DDevice->Reset (&d3dpp); D3DDevice->Release(); - //Sleep (1000); } } @@ -393,6 +374,194 @@ bool D3DFB::Reset () return true; } +//========================================================================== +// +// DoOffByOneCheck +// +// Since NVidia hardware has an off-by-one error in the pixel shader. +// On a Geforce 7950GT and a 6200, I have witnessed it skip palette entry +// 240. I have a report that an FX card skips in a totally different spot. +// So rather than try and detect it in the shader, we do it here and +// compensate when uploading the palette and when drawing by setting the +// sampler mode for the palette to border and making the border color the +// final color in the palette. +// +// Interestingly, a Radeon x300 doesn't have this problem. I am curious +// if other ATI hardware is the same. +// +//========================================================================== + +void D3DFB::DoOffByOneCheck () +{ + IDirect3DSurface9 *savedrendertarget; + IDirect3DSurface9 *testsurf, *readsurf; + D3DSURFACE_DESC desc; + D3DLOCKED_RECT lockrect; + RECT testrect = { 0, 0, 256, 1 }; + float texright = 256.f / float(FBWidth); + float texbot = 1.f / float(FBHeight); + FBVERTEX verts[4] = + { + { -0.5f, -0.5f, 0.5f, 1.f, 0.f, 0.f }, + { 255.5f, -0.5f, 0.5f, 1.f, texright, 0.f }, + { 255.5f, 0.5f, 0.5f, 1.f, texright, texbot }, + { -0.5f, 0.5f, 0.5f, 1.f, 0.f, texbot } + }; + float flash[2][4] = + { + { 0.f, 0.f, 0.f, 0.f }, + { 1.f, 1.f, 1.f, 1.f } + }; + + union + { + BYTE Pal32[256][4]; + WORD Pal16[256]; + }; + int i, c; + + if (OffByOneAt >= 0) + { + return; + } + + // Create an easily recognizable R3G3B2 palette. + if (PalFormat == D3DFMT_A8R8G8B8) + { + for (i = 0; i < 256; ++i) + { + Pal32[i][0] = (i & 0x03) << 6; // blue + Pal32[i][1] = (i & 0x1C) << 3; // green + Pal32[i][2] = (i & 0xE0); // red; + Pal32[i][3] = 255; + } + } + else + { + for (i = 0; i < 256; ++i) + { + Pal16[i] = ((i & 0xE0) << 8) | // red + ((i & 0x1C) << 6) | // green + ((i & 0x03) << 3); // blue + } + } + // Upload the palette + if (SUCCEEDED(PaletteTexture->LockRect (0, &lockrect, NULL, 0))) + { + memcpy (lockrect.pBits, Pal32, 256 * ((PalFormat == D3DFMT_A8R8G8B8) ? 4 : 2)); + PaletteTexture->UnlockRect (0); + } + else + { + return; + } + // Prepare a texture with values 0-256. + if (SUCCEEDED(FBTexture->LockRect (0, &lockrect, &testrect, 0))) + { + for (i = 0; i < 256; ++i) + { + ((BYTE *)lockrect.pBits)[i] = i; + } + FBTexture->UnlockRect (0); + } + else + { + return; + } + // Create a render target that we can draw it to. + if (FAILED(D3DDevice->GetRenderTarget (0, &savedrendertarget))) + { + return; + } + if (FAILED(D3DDevice->CreateRenderTarget (256, 1, PalFormat, D3DMULTISAMPLE_NONE, 0, FALSE, &testsurf, NULL))) + { + return; + } + if (FAILED(D3DDevice->CreateOffscreenPlainSurface (256, 1, PalFormat, D3DPOOL_SYSTEMMEM, &readsurf, NULL))) + { + testsurf->Release(); + return; + } + if (FAILED(D3DDevice->SetRenderTarget (0, testsurf))) + { + testsurf->Release(); + readsurf->Release(); + return; + } + // Write it to the render target using the pixel shader. + D3DDevice->BeginScene(); + D3DDevice->SetTexture (0, FBTexture); + D3DDevice->SetTexture (1, PaletteTexture); + D3DDevice->SetFVF (D3DFVF_FBVERTEX); + D3DDevice->SetPixelShader (PalTexShader); + D3DDevice->SetPixelShaderConstantF (0, flash[0], 2); + D3DDevice->DrawPrimitiveUP (D3DPT_TRIANGLEFAN, 2, verts, sizeof(FBVERTEX)); + D3DDevice->EndScene(); + D3DDevice->SetRenderTarget (0, savedrendertarget); + savedrendertarget->Release(); + // Now read it back and see where it skips an entry + if (SUCCEEDED(D3DDevice->GetRenderTargetData (testsurf, readsurf)) && + SUCCEEDED(readsurf->LockRect (&lockrect, &testrect, D3DLOCK_READONLY))) + { + desc.Format = PalFormat; + if (desc.Format == D3DFMT_A8R8G8B8 || desc.Format == D3DFMT_X8R8G8B8) + { + const BYTE *pix = (const BYTE *)lockrect.pBits; + for (i = 0; i < 256; ++i, pix += 4) + { + c = (pix[0] >> 6) | // blue + ((pix[1] >> 5) << 2) | // green + ((pix[2] >> 5) << 5); // red + if (c != i) + { + break; + } + } + } + else if (desc.Format == D3DFMT_A1R5G5B5 || desc.Format == D3DFMT_X1R5G5B5) + { + const WORD *pix = (const WORD *)lockrect.pBits; + for (i = 0; i < 256; ++i, ++pix) + { + c = ((*pix & 0x0018) >> 3) | // blue + ((*pix & 0x0380) >> 5) | // green + ((*pix & 0x7C00) >> 7) ; // red + if (c != i) + { + break; + } + } + } + else if (desc.Format == D3DFMT_R5G6B5) + { + const WORD *pix = (const WORD *)lockrect.pBits; + for (i = 0; i < 256; ++i, ++pix) + { + c = ((*pix & 0x0018) >> 3) | // blue + ((*pix & 0x0700) >> 6) | // green + ((*pix & 0xE000) >> 8) ; // red + if (c != i) + { + break; + } + } + } + else + { + // Huh? What kind of backbuffer is this? + i = 256; + } + } + readsurf->UnlockRect(); + readsurf->Release(); + testsurf->Release(); + OffByOneAt = i; + if (i < 256) + { + D3DDevice->SetSamplerState (1, D3DSAMP_ADDRESSU, D3DTADDRESS_BORDER); + } +} + bool D3DFB::CreateFBTexture () { if (FAILED(D3DDevice->CreateTexture (Width, Height, 1, D3DUSAGE_DYNAMIC, D3DFMT_L8, D3DPOOL_DEFAULT, &FBTexture, NULL))) @@ -443,7 +612,7 @@ bool D3DFB::CreatePaletteTexture () bool D3DFB::CreateVertexes () { float top = (TrueHeight - Height) * 0.5f - 0.5f; - float right = float(Width) + 0.5f; + float right = float(Width) - 0.5f; float bot = float(Height) + top + 1.f; float texright = float(Width) / float(FBWidth); float texbot = float(Height) / float(FBHeight); @@ -579,6 +748,7 @@ void D3DFB::Update () bool D3DFB::PaintToWindow () { + RECT texrect = { 0, 0, Width, Height }; D3DLOCKED_RECT lockrect; HRESULT hr; @@ -595,7 +765,8 @@ bool D3DFB::PaintToWindow () return false; } } - if (SUCCEEDED(FBTexture->LockRect (0, &lockrect, NULL, D3DLOCK_DISCARD))) + if ((FBWidth == Width && FBHeight == Height && SUCCEEDED(FBTexture->LockRect (0, &lockrect, NULL, D3DLOCK_DISCARD))) || + SUCCEEDED(FBTexture->LockRect (0, &lockrect, &texrect, 0))) { if (lockrect.Pitch == Pitch) { @@ -636,34 +807,61 @@ bool D3DFB::PaintToWindow () void D3DFB::UploadPalette () { D3DLOCKED_RECT lockrect; + int i; + if (OffByOneAt < 0) + { + DoOffByOneCheck (); + } if (SUCCEEDED(PaletteTexture->LockRect (0, &lockrect, NULL, 0))) { - NeedPalUpdate = false; + // Keep trying to update the palette if we haven't done the off-by-one + // check yet. Otherwise, wait until the next time the palette changes. + NeedPalUpdate = (OffByOneAt < 0); if (PalFormat == D3DFMT_A8R8G8B8) { BYTE *pix = (BYTE *)lockrect.pBits; - for (int i = 0; i < 256; ++i, pix += 4) + for (i = 0; i < OffByOneAt; ++i, pix += 4) { pix[0] = GammaTable[SourcePalette[i].b]; pix[1] = GammaTable[SourcePalette[i].g]; pix[2] = GammaTable[SourcePalette[i].r]; pix[3] = 255; } + for (; i < 256; ++i, pix += 4) + { + pix[0] = GammaTable[SourcePalette[i-1].b]; + pix[1] = GammaTable[SourcePalette[i-1].g]; + pix[2] = GammaTable[SourcePalette[i-1].r]; + pix[3] = 255; + } } else { WORD *pix = (WORD *)lockrect.pBits; - for (int i = 0; i < 256; ++i, ++pix) + for (i = 0; i < OffByOneAt; ++i, ++pix) { *pix = ((GammaTable[SourcePalette[i].r] >> 3) << 11) | ((GammaTable[SourcePalette[i].g] >> 2) << 5) | (GammaTable[SourcePalette[i].b] >> 3); } + for (; i < 256; ++i, ++pix) + { + *pix = ((GammaTable[SourcePalette[i-1].r] >> 3) << 11) | + ((GammaTable[SourcePalette[i-1].g] >> 2) << 5) | + (GammaTable[SourcePalette[i-1].b] >> 3); + } } PaletteTexture->UnlockRect (0); } + if (OffByOneAt < 256) + { + D3DDevice->SetSamplerState (1, D3DSAMP_BORDERCOLOR, + D3DCOLOR_XRGB(GammaTable[SourcePalette[255].r], + GammaTable[SourcePalette[255].g], + GammaTable[SourcePalette[255].b])); + } } PalEntry *D3DFB::GetPalette () diff --git a/src/win32/win32iface.h b/src/win32/win32iface.h index 0c11600bf..5508c4d62 100644 --- a/src/win32/win32iface.h +++ b/src/win32/win32iface.h @@ -238,6 +238,7 @@ private: bool CreateFBTexture(); bool CreatePaletteTexture(); bool CreateVertexes(); + void DoOffByOneCheck(); void UploadPalette(); void FillPresentParameters (D3DPRESENT_PARAMETERS *pp, bool fullscreen, bool vsync); bool Reset(); @@ -255,6 +256,7 @@ private: D3DFORMAT FBFormat; D3DFORMAT PalFormat; int FBWidth, FBHeight; + int OffByOneAt; bool VSync; IDirect3DDevice9 *D3DDevice;