gzdoom-gles/src/win32/i_system.h

259 lines
5.9 KiB
C
Raw Normal View History

// Emacs style mode select -*- C++ -*-
//-----------------------------------------------------------------------------
//
// $Id:$
//
// Copyright (C) 1993-1996 by id Software, Inc.
//
// This source is available for distribution and/or modification
// only under the terms of the DOOM Source Code License as
// published by id Software. All rights reserved.
//
// The source is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License
// for more details.
//
// DESCRIPTION:
// System specific interface stuff.
//
//-----------------------------------------------------------------------------
#ifndef __I_SYSTEM__
#define __I_SYSTEM__
#include "d_main.h"
#include "d_ticcmd.h"
#include "d_event.h"
// Index values into the LanguageIDs array
enum
{
LANGIDX_UserPreferred,
LANGIDX_UserDefault,
LANGIDX_SysPreferred,
LANGIDX_SysDefault
};
extern uint32 LanguageIDs[4];
extern void SetLanguageIDs ();
// [RH] Detects the OS the game is running under.
void I_DetectOS (void);
typedef enum {
os_unknown,
os_Win95,
os_WinNT4,
os_Win2k
} os_t;
extern os_t OSPlatform;
struct CPUInfo // 92 bytes
{
- Ported vlinetallasm4 to AMD64 assembly. Even with the increased number of registers AMD64 provides, this routine still needs to be written as self- modifying code for maximum performance. The additional registers do allow for further optimization over the x86 version by allowing all four pixels to be in flight at the same time. The end result is that AMD64 ASM is about 2.18 times faster than AMD64 C and about 1.06 times faster than x86 ASM. (For further comparison, AMD64 C and x86 C are practically the same for this function.) Should I port any more assembly to AMD64, mvlineasm4 is the most likely candidate, but it's not used enough at this point to bother. Also, this may or may not work with Linux at the moment, since it doesn't have the eh_handler metadata. Win64 is easier, since I just need to structure the function prologue and epilogue properly and use some assembler directives/macros to automatically generate the metadata. And that brings up another point: You need YASM to assemble the AMD64 code, because NASM doesn't support the Win64 metadata directives. - Added an SSE version of DoBlending. This is strictly C intrinsics. VC++ still throws around unneccessary register moves. GCC seems to be pretty close to optimal, requiring only about 2 cycles/color. They're both faster than my hand-written MMX routine, so I don't need to feel bad about not hand-optimizing this for x64 builds. - Removed an extra instruction from DoBlending_MMX, transposed two instructions, and unrolled it once, shaving off about 80 cycles from the time required to blend 256 palette entries. Why? Because I tried writing a C version of the routine using compiler intrinsics and was appalled by all the extra movq's VC++ added to the code. GCC was better, but still generated extra instructions. I only wanted a C version because I can't use inline assembly with VC++'s x64 compiler, and x64 assembly is a bit of a pain. (It's a pain because Linux and Windows have different calling conventions, and you need to maintain extra metadata for functions.) So, the assembly version stays and the C version stays out. - Removed all the pixel doubling r_detail modes, since the one platform they were intended to assist (486) actually sees very little benefit from them. - Rewrote CheckMMX in C and renamed it to CheckCPU. - Fixed: CPUID function 0x80000005 is specified to return detailed L1 cache only for AMD processors, so we must not use it on other architectures, or we end up overwriting the L1 cache line size with 0 or some other number we don't actually understand. SVN r1134 (trunk)
2008-08-09 03:13:43 +00:00
char VendorID[16]; // 0
char CPUString[48]; // 16
- Ported vlinetallasm4 to AMD64 assembly. Even with the increased number of registers AMD64 provides, this routine still needs to be written as self- modifying code for maximum performance. The additional registers do allow for further optimization over the x86 version by allowing all four pixels to be in flight at the same time. The end result is that AMD64 ASM is about 2.18 times faster than AMD64 C and about 1.06 times faster than x86 ASM. (For further comparison, AMD64 C and x86 C are practically the same for this function.) Should I port any more assembly to AMD64, mvlineasm4 is the most likely candidate, but it's not used enough at this point to bother. Also, this may or may not work with Linux at the moment, since it doesn't have the eh_handler metadata. Win64 is easier, since I just need to structure the function prologue and epilogue properly and use some assembler directives/macros to automatically generate the metadata. And that brings up another point: You need YASM to assemble the AMD64 code, because NASM doesn't support the Win64 metadata directives. - Added an SSE version of DoBlending. This is strictly C intrinsics. VC++ still throws around unneccessary register moves. GCC seems to be pretty close to optimal, requiring only about 2 cycles/color. They're both faster than my hand-written MMX routine, so I don't need to feel bad about not hand-optimizing this for x64 builds. - Removed an extra instruction from DoBlending_MMX, transposed two instructions, and unrolled it once, shaving off about 80 cycles from the time required to blend 256 palette entries. Why? Because I tried writing a C version of the routine using compiler intrinsics and was appalled by all the extra movq's VC++ added to the code. GCC was better, but still generated extra instructions. I only wanted a C version because I can't use inline assembly with VC++'s x64 compiler, and x64 assembly is a bit of a pain. (It's a pain because Linux and Windows have different calling conventions, and you need to maintain extra metadata for functions.) So, the assembly version stays and the C version stays out. - Removed all the pixel doubling r_detail modes, since the one platform they were intended to assist (486) actually sees very little benefit from them. - Rewrote CheckMMX in C and renamed it to CheckCPU. - Fixed: CPUID function 0x80000005 is specified to return detailed L1 cache only for AMD processors, so we must not use it on other architectures, or we end up overwriting the L1 cache line size with 0 or some other number we don't actually understand. SVN r1134 (trunk)
2008-08-09 03:13:43 +00:00
BYTE Stepping; // 64
BYTE Model; // 65
BYTE Family; // 66
BYTE Type; // 67
- Ported vlinetallasm4 to AMD64 assembly. Even with the increased number of registers AMD64 provides, this routine still needs to be written as self- modifying code for maximum performance. The additional registers do allow for further optimization over the x86 version by allowing all four pixels to be in flight at the same time. The end result is that AMD64 ASM is about 2.18 times faster than AMD64 C and about 1.06 times faster than x86 ASM. (For further comparison, AMD64 C and x86 C are practically the same for this function.) Should I port any more assembly to AMD64, mvlineasm4 is the most likely candidate, but it's not used enough at this point to bother. Also, this may or may not work with Linux at the moment, since it doesn't have the eh_handler metadata. Win64 is easier, since I just need to structure the function prologue and epilogue properly and use some assembler directives/macros to automatically generate the metadata. And that brings up another point: You need YASM to assemble the AMD64 code, because NASM doesn't support the Win64 metadata directives. - Added an SSE version of DoBlending. This is strictly C intrinsics. VC++ still throws around unneccessary register moves. GCC seems to be pretty close to optimal, requiring only about 2 cycles/color. They're both faster than my hand-written MMX routine, so I don't need to feel bad about not hand-optimizing this for x64 builds. - Removed an extra instruction from DoBlending_MMX, transposed two instructions, and unrolled it once, shaving off about 80 cycles from the time required to blend 256 palette entries. Why? Because I tried writing a C version of the routine using compiler intrinsics and was appalled by all the extra movq's VC++ added to the code. GCC was better, but still generated extra instructions. I only wanted a C version because I can't use inline assembly with VC++'s x64 compiler, and x64 assembly is a bit of a pain. (It's a pain because Linux and Windows have different calling conventions, and you need to maintain extra metadata for functions.) So, the assembly version stays and the C version stays out. - Removed all the pixel doubling r_detail modes, since the one platform they were intended to assist (486) actually sees very little benefit from them. - Rewrote CheckMMX in C and renamed it to CheckCPU. - Fixed: CPUID function 0x80000005 is specified to return detailed L1 cache only for AMD processors, so we must not use it on other architectures, or we end up overwriting the L1 cache line size with 0 or some other number we don't actually understand. SVN r1134 (trunk)
2008-08-09 03:13:43 +00:00
BYTE BrandIndex; // 68
BYTE CLFlush; // 69
BYTE CPUCount; // 70
BYTE APICID; // 71
- Ported vlinetallasm4 to AMD64 assembly. Even with the increased number of registers AMD64 provides, this routine still needs to be written as self- modifying code for maximum performance. The additional registers do allow for further optimization over the x86 version by allowing all four pixels to be in flight at the same time. The end result is that AMD64 ASM is about 2.18 times faster than AMD64 C and about 1.06 times faster than x86 ASM. (For further comparison, AMD64 C and x86 C are practically the same for this function.) Should I port any more assembly to AMD64, mvlineasm4 is the most likely candidate, but it's not used enough at this point to bother. Also, this may or may not work with Linux at the moment, since it doesn't have the eh_handler metadata. Win64 is easier, since I just need to structure the function prologue and epilogue properly and use some assembler directives/macros to automatically generate the metadata. And that brings up another point: You need YASM to assemble the AMD64 code, because NASM doesn't support the Win64 metadata directives. - Added an SSE version of DoBlending. This is strictly C intrinsics. VC++ still throws around unneccessary register moves. GCC seems to be pretty close to optimal, requiring only about 2 cycles/color. They're both faster than my hand-written MMX routine, so I don't need to feel bad about not hand-optimizing this for x64 builds. - Removed an extra instruction from DoBlending_MMX, transposed two instructions, and unrolled it once, shaving off about 80 cycles from the time required to blend 256 palette entries. Why? Because I tried writing a C version of the routine using compiler intrinsics and was appalled by all the extra movq's VC++ added to the code. GCC was better, but still generated extra instructions. I only wanted a C version because I can't use inline assembly with VC++'s x64 compiler, and x64 assembly is a bit of a pain. (It's a pain because Linux and Windows have different calling conventions, and you need to maintain extra metadata for functions.) So, the assembly version stays and the C version stays out. - Removed all the pixel doubling r_detail modes, since the one platform they were intended to assist (486) actually sees very little benefit from them. - Rewrote CheckMMX in C and renamed it to CheckCPU. - Fixed: CPUID function 0x80000005 is specified to return detailed L1 cache only for AMD processors, so we must not use it on other architectures, or we end up overwriting the L1 cache line size with 0 or some other number we don't actually understand. SVN r1134 (trunk)
2008-08-09 03:13:43 +00:00
DWORD bSSE3:1; // 72
DWORD DontCare1:31;
- Ported vlinetallasm4 to AMD64 assembly. Even with the increased number of registers AMD64 provides, this routine still needs to be written as self- modifying code for maximum performance. The additional registers do allow for further optimization over the x86 version by allowing all four pixels to be in flight at the same time. The end result is that AMD64 ASM is about 2.18 times faster than AMD64 C and about 1.06 times faster than x86 ASM. (For further comparison, AMD64 C and x86 C are practically the same for this function.) Should I port any more assembly to AMD64, mvlineasm4 is the most likely candidate, but it's not used enough at this point to bother. Also, this may or may not work with Linux at the moment, since it doesn't have the eh_handler metadata. Win64 is easier, since I just need to structure the function prologue and epilogue properly and use some assembler directives/macros to automatically generate the metadata. And that brings up another point: You need YASM to assemble the AMD64 code, because NASM doesn't support the Win64 metadata directives. - Added an SSE version of DoBlending. This is strictly C intrinsics. VC++ still throws around unneccessary register moves. GCC seems to be pretty close to optimal, requiring only about 2 cycles/color. They're both faster than my hand-written MMX routine, so I don't need to feel bad about not hand-optimizing this for x64 builds. - Removed an extra instruction from DoBlending_MMX, transposed two instructions, and unrolled it once, shaving off about 80 cycles from the time required to blend 256 palette entries. Why? Because I tried writing a C version of the routine using compiler intrinsics and was appalled by all the extra movq's VC++ added to the code. GCC was better, but still generated extra instructions. I only wanted a C version because I can't use inline assembly with VC++'s x64 compiler, and x64 assembly is a bit of a pain. (It's a pain because Linux and Windows have different calling conventions, and you need to maintain extra metadata for functions.) So, the assembly version stays and the C version stays out. - Removed all the pixel doubling r_detail modes, since the one platform they were intended to assist (486) actually sees very little benefit from them. - Rewrote CheckMMX in C and renamed it to CheckCPU. - Fixed: CPUID function 0x80000005 is specified to return detailed L1 cache only for AMD processors, so we must not use it on other architectures, or we end up overwriting the L1 cache line size with 0 or some other number we don't actually understand. SVN r1134 (trunk)
2008-08-09 03:13:43 +00:00
DWORD bFPU:1; // 76
DWORD bVME:1;
DWORD bDE:1;
DWORD bPSE:1;
DWORD bRDTSC:1;
DWORD bMSR:1;
DWORD bPAE:1;
DWORD bMCE:1;
- Ported vlinetallasm4 to AMD64 assembly. Even with the increased number of registers AMD64 provides, this routine still needs to be written as self- modifying code for maximum performance. The additional registers do allow for further optimization over the x86 version by allowing all four pixels to be in flight at the same time. The end result is that AMD64 ASM is about 2.18 times faster than AMD64 C and about 1.06 times faster than x86 ASM. (For further comparison, AMD64 C and x86 C are practically the same for this function.) Should I port any more assembly to AMD64, mvlineasm4 is the most likely candidate, but it's not used enough at this point to bother. Also, this may or may not work with Linux at the moment, since it doesn't have the eh_handler metadata. Win64 is easier, since I just need to structure the function prologue and epilogue properly and use some assembler directives/macros to automatically generate the metadata. And that brings up another point: You need YASM to assemble the AMD64 code, because NASM doesn't support the Win64 metadata directives. - Added an SSE version of DoBlending. This is strictly C intrinsics. VC++ still throws around unneccessary register moves. GCC seems to be pretty close to optimal, requiring only about 2 cycles/color. They're both faster than my hand-written MMX routine, so I don't need to feel bad about not hand-optimizing this for x64 builds. - Removed an extra instruction from DoBlending_MMX, transposed two instructions, and unrolled it once, shaving off about 80 cycles from the time required to blend 256 palette entries. Why? Because I tried writing a C version of the routine using compiler intrinsics and was appalled by all the extra movq's VC++ added to the code. GCC was better, but still generated extra instructions. I only wanted a C version because I can't use inline assembly with VC++'s x64 compiler, and x64 assembly is a bit of a pain. (It's a pain because Linux and Windows have different calling conventions, and you need to maintain extra metadata for functions.) So, the assembly version stays and the C version stays out. - Removed all the pixel doubling r_detail modes, since the one platform they were intended to assist (486) actually sees very little benefit from them. - Rewrote CheckMMX in C and renamed it to CheckCPU. - Fixed: CPUID function 0x80000005 is specified to return detailed L1 cache only for AMD processors, so we must not use it on other architectures, or we end up overwriting the L1 cache line size with 0 or some other number we don't actually understand. SVN r1134 (trunk)
2008-08-09 03:13:43 +00:00
DWORD bCX8:1; // 77
DWORD bAPIC:1;
DWORD bReserved1:1;
DWORD bSEP:1;
DWORD bMTRR:1;
DWORD bPGE:1;
DWORD bMCA:1;
DWORD bCMOV:1;
- Ported vlinetallasm4 to AMD64 assembly. Even with the increased number of registers AMD64 provides, this routine still needs to be written as self- modifying code for maximum performance. The additional registers do allow for further optimization over the x86 version by allowing all four pixels to be in flight at the same time. The end result is that AMD64 ASM is about 2.18 times faster than AMD64 C and about 1.06 times faster than x86 ASM. (For further comparison, AMD64 C and x86 C are practically the same for this function.) Should I port any more assembly to AMD64, mvlineasm4 is the most likely candidate, but it's not used enough at this point to bother. Also, this may or may not work with Linux at the moment, since it doesn't have the eh_handler metadata. Win64 is easier, since I just need to structure the function prologue and epilogue properly and use some assembler directives/macros to automatically generate the metadata. And that brings up another point: You need YASM to assemble the AMD64 code, because NASM doesn't support the Win64 metadata directives. - Added an SSE version of DoBlending. This is strictly C intrinsics. VC++ still throws around unneccessary register moves. GCC seems to be pretty close to optimal, requiring only about 2 cycles/color. They're both faster than my hand-written MMX routine, so I don't need to feel bad about not hand-optimizing this for x64 builds. - Removed an extra instruction from DoBlending_MMX, transposed two instructions, and unrolled it once, shaving off about 80 cycles from the time required to blend 256 palette entries. Why? Because I tried writing a C version of the routine using compiler intrinsics and was appalled by all the extra movq's VC++ added to the code. GCC was better, but still generated extra instructions. I only wanted a C version because I can't use inline assembly with VC++'s x64 compiler, and x64 assembly is a bit of a pain. (It's a pain because Linux and Windows have different calling conventions, and you need to maintain extra metadata for functions.) So, the assembly version stays and the C version stays out. - Removed all the pixel doubling r_detail modes, since the one platform they were intended to assist (486) actually sees very little benefit from them. - Rewrote CheckMMX in C and renamed it to CheckCPU. - Fixed: CPUID function 0x80000005 is specified to return detailed L1 cache only for AMD processors, so we must not use it on other architectures, or we end up overwriting the L1 cache line size with 0 or some other number we don't actually understand. SVN r1134 (trunk)
2008-08-09 03:13:43 +00:00
DWORD bPAT:1; // 78
DWORD bPSE36:1;
DWORD bPSN:1;
DWORD bCFLUSH:1;
DWORD bReserved2:1;
DWORD bDS:1;
DWORD bACPI:1;
DWORD bMMX:1;
- Ported vlinetallasm4 to AMD64 assembly. Even with the increased number of registers AMD64 provides, this routine still needs to be written as self- modifying code for maximum performance. The additional registers do allow for further optimization over the x86 version by allowing all four pixels to be in flight at the same time. The end result is that AMD64 ASM is about 2.18 times faster than AMD64 C and about 1.06 times faster than x86 ASM. (For further comparison, AMD64 C and x86 C are practically the same for this function.) Should I port any more assembly to AMD64, mvlineasm4 is the most likely candidate, but it's not used enough at this point to bother. Also, this may or may not work with Linux at the moment, since it doesn't have the eh_handler metadata. Win64 is easier, since I just need to structure the function prologue and epilogue properly and use some assembler directives/macros to automatically generate the metadata. And that brings up another point: You need YASM to assemble the AMD64 code, because NASM doesn't support the Win64 metadata directives. - Added an SSE version of DoBlending. This is strictly C intrinsics. VC++ still throws around unneccessary register moves. GCC seems to be pretty close to optimal, requiring only about 2 cycles/color. They're both faster than my hand-written MMX routine, so I don't need to feel bad about not hand-optimizing this for x64 builds. - Removed an extra instruction from DoBlending_MMX, transposed two instructions, and unrolled it once, shaving off about 80 cycles from the time required to blend 256 palette entries. Why? Because I tried writing a C version of the routine using compiler intrinsics and was appalled by all the extra movq's VC++ added to the code. GCC was better, but still generated extra instructions. I only wanted a C version because I can't use inline assembly with VC++'s x64 compiler, and x64 assembly is a bit of a pain. (It's a pain because Linux and Windows have different calling conventions, and you need to maintain extra metadata for functions.) So, the assembly version stays and the C version stays out. - Removed all the pixel doubling r_detail modes, since the one platform they were intended to assist (486) actually sees very little benefit from them. - Rewrote CheckMMX in C and renamed it to CheckCPU. - Fixed: CPUID function 0x80000005 is specified to return detailed L1 cache only for AMD processors, so we must not use it on other architectures, or we end up overwriting the L1 cache line size with 0 or some other number we don't actually understand. SVN r1134 (trunk)
2008-08-09 03:13:43 +00:00
DWORD bFXSR:1; // 79
DWORD bSSE:1;
DWORD bSSE2:1;
DWORD bSS:1;
DWORD bHTT:1;
DWORD bTM:1;
DWORD bReserved3:1;
DWORD bPBE:1;
- Ported vlinetallasm4 to AMD64 assembly. Even with the increased number of registers AMD64 provides, this routine still needs to be written as self- modifying code for maximum performance. The additional registers do allow for further optimization over the x86 version by allowing all four pixels to be in flight at the same time. The end result is that AMD64 ASM is about 2.18 times faster than AMD64 C and about 1.06 times faster than x86 ASM. (For further comparison, AMD64 C and x86 C are practically the same for this function.) Should I port any more assembly to AMD64, mvlineasm4 is the most likely candidate, but it's not used enough at this point to bother. Also, this may or may not work with Linux at the moment, since it doesn't have the eh_handler metadata. Win64 is easier, since I just need to structure the function prologue and epilogue properly and use some assembler directives/macros to automatically generate the metadata. And that brings up another point: You need YASM to assemble the AMD64 code, because NASM doesn't support the Win64 metadata directives. - Added an SSE version of DoBlending. This is strictly C intrinsics. VC++ still throws around unneccessary register moves. GCC seems to be pretty close to optimal, requiring only about 2 cycles/color. They're both faster than my hand-written MMX routine, so I don't need to feel bad about not hand-optimizing this for x64 builds. - Removed an extra instruction from DoBlending_MMX, transposed two instructions, and unrolled it once, shaving off about 80 cycles from the time required to blend 256 palette entries. Why? Because I tried writing a C version of the routine using compiler intrinsics and was appalled by all the extra movq's VC++ added to the code. GCC was better, but still generated extra instructions. I only wanted a C version because I can't use inline assembly with VC++'s x64 compiler, and x64 assembly is a bit of a pain. (It's a pain because Linux and Windows have different calling conventions, and you need to maintain extra metadata for functions.) So, the assembly version stays and the C version stays out. - Removed all the pixel doubling r_detail modes, since the one platform they were intended to assist (486) actually sees very little benefit from them. - Rewrote CheckMMX in C and renamed it to CheckCPU. - Fixed: CPUID function 0x80000005 is specified to return detailed L1 cache only for AMD processors, so we must not use it on other architectures, or we end up overwriting the L1 cache line size with 0 or some other number we don't actually understand. SVN r1134 (trunk)
2008-08-09 03:13:43 +00:00
DWORD DontCare2:22; // 80
DWORD bMMXPlus:1; // AMD's MMX extensions
DWORD bMMXAgain:1; // Just a copy of bMMX above
DWORD DontCare3:6;
DWORD b3DNowPlus:1;
DWORD b3DNow:1;
- Ported vlinetallasm4 to AMD64 assembly. Even with the increased number of registers AMD64 provides, this routine still needs to be written as self- modifying code for maximum performance. The additional registers do allow for further optimization over the x86 version by allowing all four pixels to be in flight at the same time. The end result is that AMD64 ASM is about 2.18 times faster than AMD64 C and about 1.06 times faster than x86 ASM. (For further comparison, AMD64 C and x86 C are practically the same for this function.) Should I port any more assembly to AMD64, mvlineasm4 is the most likely candidate, but it's not used enough at this point to bother. Also, this may or may not work with Linux at the moment, since it doesn't have the eh_handler metadata. Win64 is easier, since I just need to structure the function prologue and epilogue properly and use some assembler directives/macros to automatically generate the metadata. And that brings up another point: You need YASM to assemble the AMD64 code, because NASM doesn't support the Win64 metadata directives. - Added an SSE version of DoBlending. This is strictly C intrinsics. VC++ still throws around unneccessary register moves. GCC seems to be pretty close to optimal, requiring only about 2 cycles/color. They're both faster than my hand-written MMX routine, so I don't need to feel bad about not hand-optimizing this for x64 builds. - Removed an extra instruction from DoBlending_MMX, transposed two instructions, and unrolled it once, shaving off about 80 cycles from the time required to blend 256 palette entries. Why? Because I tried writing a C version of the routine using compiler intrinsics and was appalled by all the extra movq's VC++ added to the code. GCC was better, but still generated extra instructions. I only wanted a C version because I can't use inline assembly with VC++'s x64 compiler, and x64 assembly is a bit of a pain. (It's a pain because Linux and Windows have different calling conventions, and you need to maintain extra metadata for functions.) So, the assembly version stays and the C version stays out. - Removed all the pixel doubling r_detail modes, since the one platform they were intended to assist (486) actually sees very little benefit from them. - Rewrote CheckMMX in C and renamed it to CheckCPU. - Fixed: CPUID function 0x80000005 is specified to return detailed L1 cache only for AMD processors, so we must not use it on other architectures, or we end up overwriting the L1 cache line size with 0 or some other number we don't actually understand. SVN r1134 (trunk)
2008-08-09 03:13:43 +00:00
BYTE AMDStepping; // 84
BYTE AMDModel; // 85
BYTE AMDFamily; // 86
BYTE bIsAMD; // 87
- Ported vlinetallasm4 to AMD64 assembly. Even with the increased number of registers AMD64 provides, this routine still needs to be written as self- modifying code for maximum performance. The additional registers do allow for further optimization over the x86 version by allowing all four pixels to be in flight at the same time. The end result is that AMD64 ASM is about 2.18 times faster than AMD64 C and about 1.06 times faster than x86 ASM. (For further comparison, AMD64 C and x86 C are practically the same for this function.) Should I port any more assembly to AMD64, mvlineasm4 is the most likely candidate, but it's not used enough at this point to bother. Also, this may or may not work with Linux at the moment, since it doesn't have the eh_handler metadata. Win64 is easier, since I just need to structure the function prologue and epilogue properly and use some assembler directives/macros to automatically generate the metadata. And that brings up another point: You need YASM to assemble the AMD64 code, because NASM doesn't support the Win64 metadata directives. - Added an SSE version of DoBlending. This is strictly C intrinsics. VC++ still throws around unneccessary register moves. GCC seems to be pretty close to optimal, requiring only about 2 cycles/color. They're both faster than my hand-written MMX routine, so I don't need to feel bad about not hand-optimizing this for x64 builds. - Removed an extra instruction from DoBlending_MMX, transposed two instructions, and unrolled it once, shaving off about 80 cycles from the time required to blend 256 palette entries. Why? Because I tried writing a C version of the routine using compiler intrinsics and was appalled by all the extra movq's VC++ added to the code. GCC was better, but still generated extra instructions. I only wanted a C version because I can't use inline assembly with VC++'s x64 compiler, and x64 assembly is a bit of a pain. (It's a pain because Linux and Windows have different calling conventions, and you need to maintain extra metadata for functions.) So, the assembly version stays and the C version stays out. - Removed all the pixel doubling r_detail modes, since the one platform they were intended to assist (486) actually sees very little benefit from them. - Rewrote CheckMMX in C and renamed it to CheckCPU. - Fixed: CPUID function 0x80000005 is specified to return detailed L1 cache only for AMD processors, so we must not use it on other architectures, or we end up overwriting the L1 cache line size with 0 or some other number we don't actually understand. SVN r1134 (trunk)
2008-08-09 03:13:43 +00:00
BYTE DataL1LineSize; // 88
BYTE DataL1LinesPerTag; // 89
BYTE DataL1Associativity;//90
BYTE DataL1SizeKB; // 91
};
extern "C" {
extern CPUInfo CPU;
}
// Called by DoomMain.
void I_Init (void);
// Called by D_DoomLoop,
// returns current time in tics.
extern int (*I_GetTime) (bool saveMS);
// like I_GetTime, except it waits for a new tic before returning
extern int (*I_WaitForTic) (int);
int I_GetTimePolled (bool saveMS);
int I_GetTimeFake (void);
fixed_t I_GetTimeFrac (uint32 *ms);
//
// Called by D_DoomLoop,
// called before processing any tics in a frame
// (just after displaying a frame).
// Time consuming syncronous operations
// are performed here (joystick reading).
// Can call D_PostEvent.
//
void I_StartFrame (void);
//
// Called by D_DoomLoop,
// called before processing each tic in a frame.
// Quick syncronous operations are performed here.
// Can call D_PostEvent.
void I_StartTic (void);
// Asynchronous interrupt functions should maintain private queues
// that are read by the synchronous functions
// to be converted into events.
// Either returns a null ticcmd,
// or calls a loadable driver to build it.
// This ticcmd will then be modified by the gameloop
// for normal input.
ticcmd_t *I_BaseTiccmd (void);
// Called by M_Responder when quit is selected.
// Clean exit, displays sell blurb.
void I_Quit (void);
void I_Tactile (int on, int off, int total);
void STACK_ARGS I_Error (const char *error, ...) GCCPRINTF(1,2);
void STACK_ARGS I_FatalError (const char *error, ...) GCCPRINTF(1,2);
void atterm (void (*func)(void));
void popterm ();
// Repaint the pre-game console
void I_PaintConsole (void);
// Print a console string
Note: I have not tried compiling these recent changes under Linux. I wouldn't be surprised if it doesn't work. - Reorganized the network startup loops so now they are event driven. There is a single function that gets called to drive it, and it uses callbacks to perform the different stages of the synchronization. This lets me have a nice, responsive abort button instead of the previous unannounced hit-escape-to- abort behavior, and I think the rearranged code is slightly easier to understand too. - Increased the number of bytes for version info during D_ArbitrateNetStart(), in preparation for the day when NETGAMEVERSION requires more than one byte. - I noticed an issue with Vista RC1 and the new fatal error setup. Even after releasing a DirectDraw or Direct3D interface, the DWM can still use the last image drawn using them when it composites the window. It doesn't always do it but it does often enough that it is a real problem. At this point, I don't know if it's a problem with the release version of Vista or not. After messing around, I discovered the problem was caused by ~Win32Video() hiding the window and then having it immediately shown soon after. The DWM kept an image of the window to do the transition effect with, and then when it didn't get a chance to do the transition, it didn't properly forget about its saved image and kept plastering it on top of everything else underneath. - Added a network synchronization panel to the window during netgame startup. - Fixed: PClass::CreateDerivedClass() must initialize StateList to NULL. Otherwise, classic DECORATE definitions generate a big, fat crash. - Resurrected the R_Init progress bar, now as a standard Windows control. - Removed the sound failure dialog. The FMOD setup already defaulted to no sound if initialization failed, so this only applies when snd_output is set to "alternate" which now also falls back to no sound. In addition, it wasn't working right, and I didn't feel like fixing it for the probably 0% of users it affected. - Fixed: The edit control used for logging output added text in reverse order on Win9x. - Went back to the roots and made graphics initialization one of the last things to happen during setup. Now the startup text is visible again. More importantly, the main window is no longer created invisible, which seems to cause trouble with it not always appearing in the taskbar. The fatal error dialog is now also embedded in the main window instead of being a separate modal dialog, so you can play with the log window to see any problems that might be reported there. Rather than completely restoring the original startup order, I tried to keep things as close to the way they were with early graphics startup. In particular, V_Init() now creates a dummy screen so that things that need screen dimensions can get them. It gets replaced by the real screen later in I_InitGraphics(). Will need to check this under Linux to make sure it didn't cause any problems there. - Removed the following stubs that just called functions in Video: - I_StartModeIterator() - I_NextMode() - I_DisplayType() I_FullscreenChanged() was also removed, and a new fullscreen parameter was added to IVideo::StartModeIterator(), since that's all it controlled. - Renamed I_InitHardware() back to I_InitGraphics(), since that's all it's initialized post-1.22. SVN r416 (trunk)
2006-12-19 04:09:10 +00:00
void I_PrintStr (const char *cp);
// Set the title string of the startup window
Note: I have not tried compiling these recent changes under Linux. I wouldn't be surprised if it doesn't work. - Reorganized the network startup loops so now they are event driven. There is a single function that gets called to drive it, and it uses callbacks to perform the different stages of the synchronization. This lets me have a nice, responsive abort button instead of the previous unannounced hit-escape-to- abort behavior, and I think the rearranged code is slightly easier to understand too. - Increased the number of bytes for version info during D_ArbitrateNetStart(), in preparation for the day when NETGAMEVERSION requires more than one byte. - I noticed an issue with Vista RC1 and the new fatal error setup. Even after releasing a DirectDraw or Direct3D interface, the DWM can still use the last image drawn using them when it composites the window. It doesn't always do it but it does often enough that it is a real problem. At this point, I don't know if it's a problem with the release version of Vista or not. After messing around, I discovered the problem was caused by ~Win32Video() hiding the window and then having it immediately shown soon after. The DWM kept an image of the window to do the transition effect with, and then when it didn't get a chance to do the transition, it didn't properly forget about its saved image and kept plastering it on top of everything else underneath. - Added a network synchronization panel to the window during netgame startup. - Fixed: PClass::CreateDerivedClass() must initialize StateList to NULL. Otherwise, classic DECORATE definitions generate a big, fat crash. - Resurrected the R_Init progress bar, now as a standard Windows control. - Removed the sound failure dialog. The FMOD setup already defaulted to no sound if initialization failed, so this only applies when snd_output is set to "alternate" which now also falls back to no sound. In addition, it wasn't working right, and I didn't feel like fixing it for the probably 0% of users it affected. - Fixed: The edit control used for logging output added text in reverse order on Win9x. - Went back to the roots and made graphics initialization one of the last things to happen during setup. Now the startup text is visible again. More importantly, the main window is no longer created invisible, which seems to cause trouble with it not always appearing in the taskbar. The fatal error dialog is now also embedded in the main window instead of being a separate modal dialog, so you can play with the log window to see any problems that might be reported there. Rather than completely restoring the original startup order, I tried to keep things as close to the way they were with early graphics startup. In particular, V_Init() now creates a dummy screen so that things that need screen dimensions can get them. It gets replaced by the real screen later in I_InitGraphics(). Will need to check this under Linux to make sure it didn't cause any problems there. - Removed the following stubs that just called functions in Video: - I_StartModeIterator() - I_NextMode() - I_DisplayType() I_FullscreenChanged() was also removed, and a new fullscreen parameter was added to IVideo::StartModeIterator(), since that's all it controlled. - Renamed I_InitHardware() back to I_InitGraphics(), since that's all it's initialized post-1.22. SVN r416 (trunk)
2006-12-19 04:09:10 +00:00
struct IWADInfo;
void I_SetIWADInfo (const IWADInfo *title);
// Pick from multiple IWADs to use
int I_PickIWad (WadStuff *wads, int numwads, bool queryiwad, int defaultiwad);
// The ini could not be saved at exit
bool I_WriteIniFailed ();
// [RH] Returns millisecond-accurate time
unsigned int I_MSTime (void);
Note: I have not tried compiling these recent changes under Linux. I wouldn't be surprised if it doesn't work. - Reorganized the network startup loops so now they are event driven. There is a single function that gets called to drive it, and it uses callbacks to perform the different stages of the synchronization. This lets me have a nice, responsive abort button instead of the previous unannounced hit-escape-to- abort behavior, and I think the rearranged code is slightly easier to understand too. - Increased the number of bytes for version info during D_ArbitrateNetStart(), in preparation for the day when NETGAMEVERSION requires more than one byte. - I noticed an issue with Vista RC1 and the new fatal error setup. Even after releasing a DirectDraw or Direct3D interface, the DWM can still use the last image drawn using them when it composites the window. It doesn't always do it but it does often enough that it is a real problem. At this point, I don't know if it's a problem with the release version of Vista or not. After messing around, I discovered the problem was caused by ~Win32Video() hiding the window and then having it immediately shown soon after. The DWM kept an image of the window to do the transition effect with, and then when it didn't get a chance to do the transition, it didn't properly forget about its saved image and kept plastering it on top of everything else underneath. - Added a network synchronization panel to the window during netgame startup. - Fixed: PClass::CreateDerivedClass() must initialize StateList to NULL. Otherwise, classic DECORATE definitions generate a big, fat crash. - Resurrected the R_Init progress bar, now as a standard Windows control. - Removed the sound failure dialog. The FMOD setup already defaulted to no sound if initialization failed, so this only applies when snd_output is set to "alternate" which now also falls back to no sound. In addition, it wasn't working right, and I didn't feel like fixing it for the probably 0% of users it affected. - Fixed: The edit control used for logging output added text in reverse order on Win9x. - Went back to the roots and made graphics initialization one of the last things to happen during setup. Now the startup text is visible again. More importantly, the main window is no longer created invisible, which seems to cause trouble with it not always appearing in the taskbar. The fatal error dialog is now also embedded in the main window instead of being a separate modal dialog, so you can play with the log window to see any problems that might be reported there. Rather than completely restoring the original startup order, I tried to keep things as close to the way they were with early graphics startup. In particular, V_Init() now creates a dummy screen so that things that need screen dimensions can get them. It gets replaced by the real screen later in I_InitGraphics(). Will need to check this under Linux to make sure it didn't cause any problems there. - Removed the following stubs that just called functions in Video: - I_StartModeIterator() - I_NextMode() - I_DisplayType() I_FullscreenChanged() was also removed, and a new fullscreen parameter was added to IVideo::StartModeIterator(), since that's all it controlled. - Renamed I_InitHardware() back to I_InitGraphics(), since that's all it's initialized post-1.22. SVN r416 (trunk)
2006-12-19 04:09:10 +00:00
// [RH] Title banner to display during startup
extern const IWADInfo *DoomStartupInfo;
Note: I have not tried compiling these recent changes under Linux. I wouldn't be surprised if it doesn't work. - Reorganized the network startup loops so now they are event driven. There is a single function that gets called to drive it, and it uses callbacks to perform the different stages of the synchronization. This lets me have a nice, responsive abort button instead of the previous unannounced hit-escape-to- abort behavior, and I think the rearranged code is slightly easier to understand too. - Increased the number of bytes for version info during D_ArbitrateNetStart(), in preparation for the day when NETGAMEVERSION requires more than one byte. - I noticed an issue with Vista RC1 and the new fatal error setup. Even after releasing a DirectDraw or Direct3D interface, the DWM can still use the last image drawn using them when it composites the window. It doesn't always do it but it does often enough that it is a real problem. At this point, I don't know if it's a problem with the release version of Vista or not. After messing around, I discovered the problem was caused by ~Win32Video() hiding the window and then having it immediately shown soon after. The DWM kept an image of the window to do the transition effect with, and then when it didn't get a chance to do the transition, it didn't properly forget about its saved image and kept plastering it on top of everything else underneath. - Added a network synchronization panel to the window during netgame startup. - Fixed: PClass::CreateDerivedClass() must initialize StateList to NULL. Otherwise, classic DECORATE definitions generate a big, fat crash. - Resurrected the R_Init progress bar, now as a standard Windows control. - Removed the sound failure dialog. The FMOD setup already defaulted to no sound if initialization failed, so this only applies when snd_output is set to "alternate" which now also falls back to no sound. In addition, it wasn't working right, and I didn't feel like fixing it for the probably 0% of users it affected. - Fixed: The edit control used for logging output added text in reverse order on Win9x. - Went back to the roots and made graphics initialization one of the last things to happen during setup. Now the startup text is visible again. More importantly, the main window is no longer created invisible, which seems to cause trouble with it not always appearing in the taskbar. The fatal error dialog is now also embedded in the main window instead of being a separate modal dialog, so you can play with the log window to see any problems that might be reported there. Rather than completely restoring the original startup order, I tried to keep things as close to the way they were with early graphics startup. In particular, V_Init() now creates a dummy screen so that things that need screen dimensions can get them. It gets replaced by the real screen later in I_InitGraphics(). Will need to check this under Linux to make sure it didn't cause any problems there. - Removed the following stubs that just called functions in Video: - I_StartModeIterator() - I_NextMode() - I_DisplayType() I_FullscreenChanged() was also removed, and a new fullscreen parameter was added to IVideo::StartModeIterator(), since that's all it controlled. - Renamed I_InitHardware() back to I_InitGraphics(), since that's all it's initialized post-1.22. SVN r416 (trunk)
2006-12-19 04:09:10 +00:00
// [RH] Used by the display code to set the normal window procedure
void I_SetWndProc();
// [RH] Checks the registry for Steam's install path, so we can scan its
// directories for IWADs if the user purchased any through Steam.
FString I_GetSteamPath();
Note: I have not tried compiling these recent changes under Linux. I wouldn't be surprised if it doesn't work. - Reorganized the network startup loops so now they are event driven. There is a single function that gets called to drive it, and it uses callbacks to perform the different stages of the synchronization. This lets me have a nice, responsive abort button instead of the previous unannounced hit-escape-to- abort behavior, and I think the rearranged code is slightly easier to understand too. - Increased the number of bytes for version info during D_ArbitrateNetStart(), in preparation for the day when NETGAMEVERSION requires more than one byte. - I noticed an issue with Vista RC1 and the new fatal error setup. Even after releasing a DirectDraw or Direct3D interface, the DWM can still use the last image drawn using them when it composites the window. It doesn't always do it but it does often enough that it is a real problem. At this point, I don't know if it's a problem with the release version of Vista or not. After messing around, I discovered the problem was caused by ~Win32Video() hiding the window and then having it immediately shown soon after. The DWM kept an image of the window to do the transition effect with, and then when it didn't get a chance to do the transition, it didn't properly forget about its saved image and kept plastering it on top of everything else underneath. - Added a network synchronization panel to the window during netgame startup. - Fixed: PClass::CreateDerivedClass() must initialize StateList to NULL. Otherwise, classic DECORATE definitions generate a big, fat crash. - Resurrected the R_Init progress bar, now as a standard Windows control. - Removed the sound failure dialog. The FMOD setup already defaulted to no sound if initialization failed, so this only applies when snd_output is set to "alternate" which now also falls back to no sound. In addition, it wasn't working right, and I didn't feel like fixing it for the probably 0% of users it affected. - Fixed: The edit control used for logging output added text in reverse order on Win9x. - Went back to the roots and made graphics initialization one of the last things to happen during setup. Now the startup text is visible again. More importantly, the main window is no longer created invisible, which seems to cause trouble with it not always appearing in the taskbar. The fatal error dialog is now also embedded in the main window instead of being a separate modal dialog, so you can play with the log window to see any problems that might be reported there. Rather than completely restoring the original startup order, I tried to keep things as close to the way they were with early graphics startup. In particular, V_Init() now creates a dummy screen so that things that need screen dimensions can get them. It gets replaced by the real screen later in I_InitGraphics(). Will need to check this under Linux to make sure it didn't cause any problems there. - Removed the following stubs that just called functions in Video: - I_StartModeIterator() - I_NextMode() - I_DisplayType() I_FullscreenChanged() was also removed, and a new fullscreen parameter was added to IVideo::StartModeIterator(), since that's all it controlled. - Renamed I_InitHardware() back to I_InitGraphics(), since that's all it's initialized post-1.22. SVN r416 (trunk)
2006-12-19 04:09:10 +00:00
// Damn Microsoft for doing Get/SetWindowLongPtr half-assed. Instead of
// giving them proper prototypes under Win32, they are just macros for
// Get/SetWindowLong, meaning they take LONGs and not LONG_PTRs.
#ifdef _WIN64
typedef long long WLONG_PTR;
#elif _MSC_VER
Note: I have not tried compiling these recent changes under Linux. I wouldn't be surprised if it doesn't work. - Reorganized the network startup loops so now they are event driven. There is a single function that gets called to drive it, and it uses callbacks to perform the different stages of the synchronization. This lets me have a nice, responsive abort button instead of the previous unannounced hit-escape-to- abort behavior, and I think the rearranged code is slightly easier to understand too. - Increased the number of bytes for version info during D_ArbitrateNetStart(), in preparation for the day when NETGAMEVERSION requires more than one byte. - I noticed an issue with Vista RC1 and the new fatal error setup. Even after releasing a DirectDraw or Direct3D interface, the DWM can still use the last image drawn using them when it composites the window. It doesn't always do it but it does often enough that it is a real problem. At this point, I don't know if it's a problem with the release version of Vista or not. After messing around, I discovered the problem was caused by ~Win32Video() hiding the window and then having it immediately shown soon after. The DWM kept an image of the window to do the transition effect with, and then when it didn't get a chance to do the transition, it didn't properly forget about its saved image and kept plastering it on top of everything else underneath. - Added a network synchronization panel to the window during netgame startup. - Fixed: PClass::CreateDerivedClass() must initialize StateList to NULL. Otherwise, classic DECORATE definitions generate a big, fat crash. - Resurrected the R_Init progress bar, now as a standard Windows control. - Removed the sound failure dialog. The FMOD setup already defaulted to no sound if initialization failed, so this only applies when snd_output is set to "alternate" which now also falls back to no sound. In addition, it wasn't working right, and I didn't feel like fixing it for the probably 0% of users it affected. - Fixed: The edit control used for logging output added text in reverse order on Win9x. - Went back to the roots and made graphics initialization one of the last things to happen during setup. Now the startup text is visible again. More importantly, the main window is no longer created invisible, which seems to cause trouble with it not always appearing in the taskbar. The fatal error dialog is now also embedded in the main window instead of being a separate modal dialog, so you can play with the log window to see any problems that might be reported there. Rather than completely restoring the original startup order, I tried to keep things as close to the way they were with early graphics startup. In particular, V_Init() now creates a dummy screen so that things that need screen dimensions can get them. It gets replaced by the real screen later in I_InitGraphics(). Will need to check this under Linux to make sure it didn't cause any problems there. - Removed the following stubs that just called functions in Video: - I_StartModeIterator() - I_NextMode() - I_DisplayType() I_FullscreenChanged() was also removed, and a new fullscreen parameter was added to IVideo::StartModeIterator(), since that's all it controlled. - Renamed I_InitHardware() back to I_InitGraphics(), since that's all it's initialized post-1.22. SVN r416 (trunk)
2006-12-19 04:09:10 +00:00
typedef _W64 long WLONG_PTR;
#else
typedef long WLONG_PTR;
Note: I have not tried compiling these recent changes under Linux. I wouldn't be surprised if it doesn't work. - Reorganized the network startup loops so now they are event driven. There is a single function that gets called to drive it, and it uses callbacks to perform the different stages of the synchronization. This lets me have a nice, responsive abort button instead of the previous unannounced hit-escape-to- abort behavior, and I think the rearranged code is slightly easier to understand too. - Increased the number of bytes for version info during D_ArbitrateNetStart(), in preparation for the day when NETGAMEVERSION requires more than one byte. - I noticed an issue with Vista RC1 and the new fatal error setup. Even after releasing a DirectDraw or Direct3D interface, the DWM can still use the last image drawn using them when it composites the window. It doesn't always do it but it does often enough that it is a real problem. At this point, I don't know if it's a problem with the release version of Vista or not. After messing around, I discovered the problem was caused by ~Win32Video() hiding the window and then having it immediately shown soon after. The DWM kept an image of the window to do the transition effect with, and then when it didn't get a chance to do the transition, it didn't properly forget about its saved image and kept plastering it on top of everything else underneath. - Added a network synchronization panel to the window during netgame startup. - Fixed: PClass::CreateDerivedClass() must initialize StateList to NULL. Otherwise, classic DECORATE definitions generate a big, fat crash. - Resurrected the R_Init progress bar, now as a standard Windows control. - Removed the sound failure dialog. The FMOD setup already defaulted to no sound if initialization failed, so this only applies when snd_output is set to "alternate" which now also falls back to no sound. In addition, it wasn't working right, and I didn't feel like fixing it for the probably 0% of users it affected. - Fixed: The edit control used for logging output added text in reverse order on Win9x. - Went back to the roots and made graphics initialization one of the last things to happen during setup. Now the startup text is visible again. More importantly, the main window is no longer created invisible, which seems to cause trouble with it not always appearing in the taskbar. The fatal error dialog is now also embedded in the main window instead of being a separate modal dialog, so you can play with the log window to see any problems that might be reported there. Rather than completely restoring the original startup order, I tried to keep things as close to the way they were with early graphics startup. In particular, V_Init() now creates a dummy screen so that things that need screen dimensions can get them. It gets replaced by the real screen later in I_InitGraphics(). Will need to check this under Linux to make sure it didn't cause any problems there. - Removed the following stubs that just called functions in Video: - I_StartModeIterator() - I_NextMode() - I_DisplayType() I_FullscreenChanged() was also removed, and a new fullscreen parameter was added to IVideo::StartModeIterator(), since that's all it controlled. - Renamed I_InitHardware() back to I_InitGraphics(), since that's all it's initialized post-1.22. SVN r416 (trunk)
2006-12-19 04:09:10 +00:00
#endif
// Directory searching routines
// Mirror WIN32_FIND_DATAA in <winbase.h>
#ifndef MAX_PATH
#define MAX_PATH 260
#endif
#ifndef PATH_MAX
#define PATH_MAX 260
#endif
struct findstate_t
{
DWORD Attribs;
DWORD Times[3*2];
DWORD Size[2];
DWORD Reserved[2];
char Name[MAX_PATH];
char AltName[14];
};
void *I_FindFirst (const char *filespec, findstate_t *fileinfo);
int I_FindNext (void *handle, findstate_t *fileinfo);
int I_FindClose (void *handle);
#define I_FindName(a) ((a)->Name)
#define I_FindAttr(a) ((a)->Attribs)
#define FA_RDONLY 0x00000001
#define FA_HIDDEN 0x00000002
#define FA_SYSTEM 0x00000004
#define FA_DIREC 0x00000010
#define FA_ARCH 0x00000020
#endif