- Changed Linux to use clock_gettime for profiling instead of rdtsc. This

avoids potential erroneous results on multicore and variable speed
  processors. Windows will need to be updated accordingly.



SVN r1142 (trunk)
This commit is contained in:
Randy Heit 2008-08-10 03:25:08 +00:00
parent fd2c0b82ef
commit ad96225213
21 changed files with 399 additions and 540 deletions

View file

@ -1,3 +1,8 @@
August 10, 2008
- Changed Linux to use clock_gettime for profiling instead of rdtsc. This
avoids potential erroneous results on multicore and variable speed
processors.
August 9, 2008 (Changes by Graf Zahl)
- Converted the last of Hexen's inventory items to DECORATE so that I could
export AInventory.

View file

@ -124,10 +124,6 @@ else( WIN32 )
endif( FPU_CONTROL_DIR )
endif( WIN32 )
if( X64 )
set( NO_ASM ON )
endif( X64 )
# Decide on the name of the FMOD library we want to use.
if( NOT FMOD_LIB_NAME AND MSVC )
@ -231,15 +227,16 @@ if( NOT NO_ASM )
set( FIXRTEXT fixrtext )
endif( WIN32 )
message( STATUS "Selected assembler: ${ASSEMBLER}" )
MACRO( ADD_ASM_FILE infile )
set( ASM_OUTPUT_${infile} "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${infile}${ASM_OUTPUT_EXTENSION}" )
MACRO( ADD_ASM_FILE indir infile )
set( ASM_OUTPUT_${infile} "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${indir}/${infile}${ASM_OUTPUT_EXTENSION}" )
if( WIN32 )
set( FIXRTEXT_${infile} COMMAND ${FIXRTEXT} "${ASM_OUTPUT_${infile}}" )
endif( WIN32 )
add_custom_command( OUTPUT ${ASM_OUTPUT_${infile}}
COMMAND ${ASSEMBLER} ${ASM_FLAGS} -i${CMAKE_CURRENT_SOURCE_DIR}/ -o"${ASM_OUTPUT_${infile}}" "${CMAKE_CURRENT_SOURCE_DIR}/${infile}"
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/zdoom.dir/${indir}
COMMAND ${ASSEMBLER} ${ASM_FLAGS} -i${CMAKE_CURRENT_SOURCE_DIR}/ -o"${ASM_OUTPUT_${infile}}" "${CMAKE_CURRENT_SOURCE_DIR}/${indir}/${infile}.asm"
${FIXRTEXT_${infile}}
DEPENDS ${infile} ${FIXRTEXT} )
DEPENDS ${indir}/${infile}.asm ${FIXRTEXT} )
set( ASM_SOURCES ${ASM_SOURCES} "${ASM_OUTPUT_${infile}}" )
ENDMACRO( ADD_ASM_FILE )
endif( NOT NO_ASM )
@ -286,6 +283,19 @@ if( NOT MSVC )
add_definitions( -D__forceinline=inline )
endif( NOT MSVC )
if( UNIX )
CHECK_LIBRARY_EXISTS( rt clock_gettime "" CLOCK_GETTIME_IN_RT )
if( NOT CLOCK_GETTIME_IN_RT )
CHECK_FUNCTION_EXISTS( clock_gettime CLOCK_GETTIME_EXISTS )
if( NOT CLOCK_GETTIME_EXISTS )
message( STATUS "Could not find clock_gettime. Timing statistics will not be available." )
add_definitions( -DNO_CLOCK_GETTIME )
endif( NOT CLOCK_GETTIME_EXISTS )
else( NOT CLOCK_GETTIME_IN_RT )
set( ZDOOM_LIBS ${ZDOOM_LIBS} rt )
endif( NOT CLOCK_GETTIME_IN_RT )
endif( UNIX )
# Update svnrevision.h
add_custom_target( revision_check ALL
@ -342,13 +352,13 @@ endif( WIN32 )
if( NOT NO_ASM )
if( X64 )
ADD_ASM_FILE( asm_x86_64/tmap3.asm )
ADD_ASM_FILE( asm_x86_64 tmap3 )
else( X64 )
ADD_ASM_FILE( asm_ia32/a.asm )
ADD_ASM_FILE( asm_ia32/misc.asm )
ADD_ASM_FILE( asm_ia32/tmap.asm )
ADD_ASM_FILE( asm_ia32/tmap2.asm )
ADD_ASM_FILE( asm_ia32/tmap3.asm )
ADD_ASM_FILE( asm_ia32 a )
ADD_ASM_FILE( asm_ia32 misc )
ADD_ASM_FILE( asm_ia32 tmap )
ADD_ASM_FILE( asm_ia32 tmap2 )
ADD_ASM_FILE( asm_ia32 tmap3 )
endif( X64 )
if( WIN32 )
if( NOT X64 )

View file

@ -56,7 +56,8 @@ Everything that is changed is marked (maybe commented) with "Added by MC"
#include "m_misc.h"
#include "sbar.h"
#include "p_acs.h"
#include "teaminfo.h"
#include "teaminfo.h"
#include "i_system.h"
static FRandom pr_botspawn ("BotSpawn");
@ -65,7 +66,8 @@ void InitBotStuff();
//Externs
FCajunMaster bglobal;
cycle_t BotThinkCycles, BotSupportCycles, BotWTG;
cycle_t BotThinkCycles, BotSupportCycles;
int BotWTG;
static const char *BotConfigStrings[] =
{
@ -101,7 +103,7 @@ void FCajunMaster::Main (int buf)
{
int i;
BotThinkCycles = 0;
BotThinkCycles.Reset();
if (consoleplayer != Net_Arbitrator || demoplayback)
return;
@ -114,13 +116,13 @@ void FCajunMaster::Main (int buf)
//Think for bots.
if (botnum)
{
clock (BotThinkCycles);
BotThinkCycles.Clock();
for (i = 0; i < MAXPLAYERS; i++)
{
if (playeringame[i] && players[i].mo && !freeze && players[i].isbot)
Think (players[i].mo, &netcmds[i][buf]);
}
unclock (BotThinkCycles);
BotThinkCycles.Unclock();
}
//Add new bots?
@ -645,9 +647,8 @@ bool FCajunMaster::LoadBots ()
ADD_STAT (bots)
{
FString out;
out.Format ("think = %04.1f ms support = %04.1f ms wtg = %llu",
(double)BotThinkCycles * 1000 * SecondsPerCycle,
(double)BotSupportCycles * 1000 * SecondsPerCycle,
out.Format ("think = %04.1f ms support = %04.1f ms wtg = %d",
BotThinkCycles.TimeMS(), BotSupportCycles.TimeMS(),
BotWTG);
return out;
}

View file

@ -479,8 +479,10 @@ void D_Display ()
if (nodrawers)
return; // for comparative timing / profiling
cycle_t cycles = 0;
clock (cycles);
cycle_t cycles;
cycles.Reset();
cycles.Clock();
if (players[consoleplayer].camera == NULL)
{
@ -725,7 +727,7 @@ void D_Display ()
Net_WriteByte (DEM_WIPEOFF);
}
unclock (cycles);
cycles.Unclock();
FrameCycles = cycles;
}
@ -2635,11 +2637,7 @@ ADD_STAT (fps)
{
FString out;
out.Format("frame=%04.1f ms walls=%04.1f ms planes=%04.1f ms masked=%04.1f ms",
(double)FrameCycles * SecondsPerCycle * 1000,
(double)WallCycles * SecondsPerCycle * 1000,
(double)PlaneCycles * SecondsPerCycle * 1000,
(double)MaskedCycles * SecondsPerCycle * 1000
);
FrameCycles.TimeMS(), WallCycles.TimeMS(), PlaneCycles.TimeMS(), MaskedCycles.TimeMS());
return out;
}
@ -2651,14 +2649,15 @@ ADD_STAT (fps)
//
//==========================================================================
static cycle_t bestwallcycles = INT_MAX;
static double bestwallcycles = HUGE_VAL;
ADD_STAT (wallcycles)
{
FString out;
if (WallCycles && WallCycles < bestwallcycles)
bestwallcycles = WallCycles;
out.Format ("%llu", bestwallcycles);
FString out;
double cycles = WallCycles.Time();
if (cycles && cycles < bestwallcycles)
bestwallcycles = cycles;
out.Format ("%g", bestwallcycles);
return out;
}
@ -2672,24 +2671,25 @@ ADD_STAT (wallcycles)
CCMD (clearwallcycles)
{
bestwallcycles = INT_MAX;
bestwallcycles = HUGE_VAL;
}
#if 1
// To use these, also uncomment the clock/unclock in wallscan
static cycle_t bestscancycles = INT_MAX;
static double bestscancycles = HUGE_VAL;
ADD_STAT (scancycles)
{
FString out;
if (WallScanCycles && WallScanCycles < bestscancycles)
bestscancycles = WallScanCycles;
out.Format ("%llu", bestscancycles);
FString out;
double scancycles = WallScanCycles.Time();
if (scancycles && scancycles < bestscancycles)
bestscancycles = scancycles;
out.Format ("%g", bestscancycles);
return out;
}
CCMD (clearscancycles)
{
bestscancycles = INT_MAX;
bestscancycles = HUGE_VAL;
}
#endif

View file

@ -63,9 +63,6 @@ ClassReg DObject::RegistrationInfo =
};
_DECLARE_TI(DObject)
static cycle_t StaleCycles;
static int StaleCount;
FMetaTable::~FMetaTable ()
{
FreeMeta ();
@ -528,11 +525,4 @@ void DObject::CheckIfSerialized () const
StaticType()->TypeName.GetChars());
}
}
ADD_STAT (destroys)
{
FString out;
out.Format ("Pointer fixing: %d in %04.1f ms",
StaleCount, SecondsPerCycle * (double)StaleCycles * 1000);
return out;
}

View file

@ -406,9 +406,11 @@ void DThinker::RunThinkers ()
{
int i, count;
ThinkCycles = BotSupportCycles = BotWTG = 0;
ThinkCycles.Reset();
BotSupportCycles.Reset();
BotWTG.Reset();
clock (ThinkCycles);
ThinkCycles.Clock();
// Tick every thinker left from last time
for (i = STAT_FIRST_THINKING; i <= MAX_STATNUM; ++i)
@ -426,7 +428,7 @@ void DThinker::RunThinkers ()
}
} while (count != 0);
unclock (ThinkCycles);
ThinkCycles.Unclock();
}
int DThinker::TickThinkers (FThinkerList *list, FThinkerList *dest)
@ -571,7 +573,6 @@ DThinker *FThinkerIterator::Next ()
ADD_STAT (think)
{
FString out;
out.Format ("Think time = %04.1f ms",
SecondsPerCycle * (double)ThinkCycles * 1000);
out.Format ("Think time = %04.1f ms", ThinkCycles.TimeMS());
return out;
}

View file

@ -76,7 +76,7 @@ static void PlayerLandedOnThing (AActor *mo, AActor *onmobj);
// EXTERNAL DATA DECLARATIONS ----------------------------------------------
extern cycle_t BotSupportCycles;
extern cycle_t BotWTG;
extern int BotWTG;
EXTERN_CVAR (Bool, r_drawfuzz);
EXTERN_CVAR (Int, cl_rockettrails)
@ -2646,7 +2646,7 @@ void AActor::Tick ()
if (bglobal.botnum && consoleplayer == Net_Arbitrator && !demoplayback &&
((flags & (MF_SPECIAL|MF_MISSILE)) || (flags3 & MF3_ISMONSTER)))
{
clock (BotSupportCycles);
BotSupportCycles.Clock();
bglobal.m_Thinking = true;
for (i = 0; i < MAXPLAYERS; i++)
{
@ -2681,7 +2681,7 @@ void AActor::Tick ()
}
}
bglobal.m_Thinking = false;
unclock (BotSupportCycles);
BotSupportCycles.Unclock();
}
//End of MC

View file

@ -27,14 +27,15 @@
#ifdef _MSC_VER
#include <malloc.h> // for alloca()
#endif
#include "templates.h"
#include "m_alloc.h"
#include "m_argv.h"
#include "m_swap.h"
#include "m_bbox.h"
#include "g_game.h"
#include "i_system.h"
#include "i_system.h"
#include "x86.h"
#include "w_wad.h"
#include "doomdef.h"
#include "p_local.h"
@ -2547,7 +2548,7 @@ line_t** linebuffer;
static void P_GroupLines (bool buildmap)
{
cycle_t times[16] = { 0 };
cycle_t times[16];
TArray<linf> exLightTags;
int* linesDoneInEachSector;
int i;
@ -2559,9 +2560,14 @@ static void P_GroupLines (bool buildmap)
FBoundingBox bbox;
bool flaggedNoFronts = false;
unsigned int ii, jj;
for (i = 0; i < (int)countof(times); ++i)
{
times[i].Reset();
}
// look up sector number for each subsector
clock (times[0]);
times[0].Clock();
for (i = 0; i < numsubsectors; i++)
{
subsectors[i].sector = segs[subsectors[i].firstline].sidedef->sector;
@ -2579,10 +2585,10 @@ static void P_GroupLines (bool buildmap)
subsectors[i].CenterX = fixed_t(accumx * 0.5 / subsectors[i].numlines);
subsectors[i].CenterY = fixed_t(accumy * 0.5 / subsectors[i].numlines);
}
unclock (times[0]);
times[0].Unclock();
// count number of lines in each sector
clock (times[1]);
times[1].Clock();
total = 0;
totallights = 0;
for (i = 0, li = lines; i < numlines; i++, li++)
@ -2635,10 +2641,10 @@ static void P_GroupLines (bool buildmap)
{
I_Error ("You need to fix these lines to play this map.\n");
}
unclock (times[1]);
times[1].Unclock();
// collect extra light info
clock (times[2]);
times[2].Clock();
LightStacks = new FLightStack[totallights];
ExtraLights = new FExtraLight[exLightTags.Size()];
memset (ExtraLights, 0, exLightTags.Size()*sizeof(FExtraLight));
@ -2650,10 +2656,10 @@ static void P_GroupLines (bool buildmap)
ExtraLights[ii].Lights = &LightStacks[jj];
jj += ExtraLights[ii].NumLights;
}
unclock (times[2]);
times[2].Unclock();
// build line tables for each sector
clock (times[3]);
times[3].Clock();
linebuffer = new line_t *[total];
line_t **lineb_p = linebuffer;
linesDoneInEachSector = new int[numsectors];
@ -2766,21 +2772,21 @@ static void P_GroupLines (bool buildmap)
#endif
}
delete[] linesDoneInEachSector;
unclock (times[3]);
times[3].Unclock();
// [RH] Moved this here
clock (times[4]);
times[4].Clock();
P_InitTagLists(); // killough 1/30/98: Create xref tables for tags
unclock (times[4]);
times[4].Unclock();
clock (times[5]);
times[5].Clock();
if (!buildmap)
{
P_SetSlopes ();
}
unclock (times[5]);
times[5].Unclock();
clock (times[6]);
times[6].Clock();
for (i = 0, li = lines; i < numlines; ++i, ++li)
{
if (li->special == ExtraFloor_LightOnly)
@ -2805,14 +2811,14 @@ static void P_GroupLines (bool buildmap)
}
}
}
unclock (times[6]);
times[6].Unclock();
if (showloadtimes)
{
Printf ("---Group Lines Times---\n");
for (i = 0; i < 7; ++i)
{
Printf (" time %d:%10llu\n", i, times[i]);
Printf (" time %d:%9.4f ms\n", i, times[i].Time() * 1e3);
}
}
}
@ -3160,11 +3166,16 @@ void P_FreeExtraLevelData()
// [RH] position indicates the start spot to spawn at
void P_SetupLevel (char *lumpname, int position)
{
cycle_t times[20] = { 0 };
cycle_t times[20];
FMapThing *buildthings;
int numbuildthings;
int i;
bool buildmap;
for (i = 0; i < (int)countof(times); ++i)
{
times[i].Reset();
}
wminfo.partime = 180;
@ -3278,33 +3289,33 @@ void P_SetupLevel (char *lumpname, int position)
if (!map->isText)
{
clock (times[0]);
times[0].Clock();
P_LoadVertexes (map);
unclock (times[0]);
times[0].Unclock();
// Check for maps without any BSP data at all (e.g. SLIGE)
clock (times[1]);
times[1].Clock();
P_LoadSectors (map);
unclock (times[1]);
times[1].Unclock();
clock (times[2]);
times[2].Clock();
P_LoadSideDefs (map);
unclock (times[2]);
times[2].Unclock();
clock (times[3]);
times[3].Clock();
if (!map->HasBehavior)
P_LoadLineDefs (map);
else
P_LoadLineDefs2 (map); // [RH] Load Hexen-style linedefs
unclock (times[3]);
times[3].Unclock();
clock (times[4]);
times[4].Clock();
P_LoadSideDefs2 (map);
unclock (times[4]);
times[4].Unclock();
clock (times[5]);
times[5].Clock();
P_FinishLoadingLineDefs ();
unclock (times[5]);
times[5].Unclock();
if (!map->HasBehavior)
P_LoadThings (map);
@ -3316,9 +3327,9 @@ void P_SetupLevel (char *lumpname, int position)
P_ParseTextMap(map);
}
clock (times[6]);
times[6].Clock();
P_LoopSidedefs ();
unclock (times[6]);
times[6].Unclock();
linemap.Clear();
linemap.ShrinkToFit();
@ -3378,17 +3389,17 @@ void P_SetupLevel (char *lumpname, int position)
}
else if (!map->isText) // regular nodes are not supported for text maps
{
clock (times[7]);
times[7].Clock();
P_LoadSubsectors (map);
unclock (times[7]);
times[7].Unclock();
clock (times[8]);
times[8].Clock();
if (!ForceNodeBuild) P_LoadNodes (map);
unclock (times[8]);
times[8].Unclock();
clock (times[9]);
times[9].Clock();
if (!ForceNodeBuild) P_LoadSegs (map);
unclock (times[9]);
times[9].Unclock();
}
else ForceNodeBuild = true;
@ -3418,21 +3429,21 @@ void P_SetupLevel (char *lumpname, int position)
DPrintf ("BSP generation took %.3f sec (%d segs)\n", (endTime - startTime) * 0.001, numsegs);
}
clock (times[10]);
times[10].Clock();
P_LoadBlockMap (map);
unclock (times[10]);
times[10].Unclock();
clock (times[11]);
times[11].Clock();
P_LoadReject (map, buildmap);
unclock (times[11]);
times[11].Unclock();
clock (times[12]);
times[12].Clock();
P_GroupLines (buildmap);
unclock (times[12]);
times[12].Unclock();
clock (times[13]);
times[13].Clock();
P_FloodZones ();
unclock (times[13]);
times[13].Unclock();
bodyqueslot = 0;
// phares 8/10/98: Clear body queue so the corpses from previous games are
@ -3445,7 +3456,7 @@ void P_SetupLevel (char *lumpname, int position)
if (!buildmap)
{
clock (times[14]);
times[14].Clock();
P_SpawnThings(position);
for (i = 0; i < MAXPLAYERS; ++i)
@ -3453,12 +3464,12 @@ void P_SetupLevel (char *lumpname, int position)
if (playeringame[i] && players[i].mo != NULL)
players[i].health = players[i].mo->health;
}
unclock (times[14]);
times[14].Unclock();
clock (times[15]);
times[15].Clock();
if (!map->HasBehavior && !map->isText)
P_TranslateTeleportThings (); // [RH] Assign teleport destination TIDs
unclock (times[15]);
times[15].Unclock();
}
else
{
@ -3473,9 +3484,9 @@ void P_SetupLevel (char *lumpname, int position)
// set up world state
P_SpawnSpecials ();
clock (times[16]);
times[16].Clock();
PO_Init (); // Initialize the polyobjs
unclock (times[16]);
times[16].Unclock();
// if deathmatch, randomly spawn the active players
if (deathmatch)
@ -3499,14 +3510,14 @@ void P_SetupLevel (char *lumpname, int position)
// [RH] Remove all particles
R_ClearParticles ();
clock (times[17]);
times[17].Clock();
// preload graphics and sounds
if (precache)
{
R_PrecacheLevel ();
S_PrecacheLevel ();
}
unclock (times[17]);
times[17].Unclock();
if (deathmatch)
{
@ -3542,7 +3553,7 @@ void P_SetupLevel (char *lumpname, int position)
"init polys",
"precache"
};
Printf ("Time%3d:%10llu cycles (%s)\n", i, times[i], timenames[i]);
Printf ("Time%3d:%9.4f ms (%s)\n", i, times[i].Time() * 1e3, timenames[i]);
}
}
MapThingsConverted.Clear();

View file

@ -461,7 +461,7 @@ sightcounts[2]++;
bool P_CheckSight (const AActor *t1, const AActor *t2, int flags)
{
clock (SightCycles);
SightCycles.Clock();
bool res;
@ -538,7 +538,7 @@ sightcounts[0]++;
SeePastShootableLines = 0;
done:
unclock (SightCycles);
SightCycles.Unclock();
return res;
}
@ -546,8 +546,7 @@ ADD_STAT (sight)
{
FString out;
out.Format ("%04.1f ms (%04.1f max), %5d %2d%4d%4d%4d%4d\n",
(double)SightCycles * 1000 * SecondsPerCycle,
(double)MaxSightCycles * 1000 * SecondsPerCycle,
SightCycles.TimeMS(), MaxSightCycles.TimeMS(),
sightcounts[3], sightcounts[0], sightcounts[1], sightcounts[2], sightcounts[4], sightcounts[5]);
return out;
}
@ -556,12 +555,12 @@ void P_ResetSightCounters (bool full)
{
if (full)
{
MaxSightCycles = 0;
MaxSightCycles.Reset();
}
if (SightCycles > MaxSightCycles)
if (SightCycles.Time() > MaxSightCycles.Time())
{
MaxSightCycles = SightCycles;
}
SightCycles = 0;
SightCycles.Reset();
memset (sightcounts, 0, sizeof(sightcounts));
}

View file

@ -55,6 +55,9 @@ void ATeleportFog::PostBeginPlay ()
case GAME_Strife:
SetState(FindState(NAME_Strife));
break;
default:
break;
}
}

View file

@ -1249,7 +1249,7 @@ DWORD (STACK_ARGS *dovline1)() = vlinec1;
DWORD (STACK_ARGS *doprevline1)() = vlinec1;
#ifdef X64_ASM
extern "C" static void vlinetallasm4();
extern "C" void vlinetallasm4();
#define dovline4 vlinetallasm4
extern "C" void setupvlinetallasm (int);
#else

View file

@ -1415,7 +1415,10 @@ void R_SetupBuffer ()
void R_RenderActorView (AActor *actor, bool dontmaplines)
{
WallCycles = PlaneCycles = MaskedCycles = WallScanCycles = 0;
WallCycles.Reset();
PlaneCycles.Reset();
MaskedCycles.Reset();
WallScanCycles.Reset();
R_SetupBuffer ();
R_SetupFrame (actor);
@ -1459,7 +1462,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines)
// [RH] Setup particles for this frame
R_FindParticleSubsectors ();
clock (WallCycles);
WallCycles.Clock();
DWORD savedflags = camera->renderflags;
// Never draw the player unless in chasecam mode
if (!r_showviewer)
@ -1471,16 +1474,16 @@ void R_RenderActorView (AActor *actor, bool dontmaplines)
R_RenderBSPNode (nodes + numnodes - 1); // The head node is the last node output.
}
camera->renderflags = savedflags;
unclock (WallCycles);
WallCycles.Unclock();
NetUpdate ();
if (viewactive)
{
clock (PlaneCycles);
PlaneCycles.Clock();
R_DrawPlanes ();
R_DrawSkyBoxes ();
unclock (PlaneCycles);
PlaneCycles.Unclock();
// [RH] Walk through mirrors
size_t lastmirror = WallMirrors.Size ();
@ -1491,9 +1494,9 @@ void R_RenderActorView (AActor *actor, bool dontmaplines)
NetUpdate ();
clock (MaskedCycles);
MaskedCycles.Clock();
R_DrawMasked ();
unclock (MaskedCycles);
MaskedCycles.Unclock();
NetUpdate ();

View file

@ -48,6 +48,7 @@
#include "i_video.h"
#include "i_sound.h"
#include "i_music.h"
#include "x86.h"
#include "d_main.h"
#include "d_net.h"
@ -63,23 +64,16 @@
EXTERN_CVAR (String, language)
#if defined(X86_ASM) || defined(X64_ASM)
extern "C" void STACK_ARGS CheckMMX (CPUInfo *cpu);
#endif
extern "C"
{
double SecondsPerCycle = 1e-8;
double CyclesPerSecond = 1e8;
CPUInfo CPU;
}
#ifndef NO_GTK
extern bool GtkAvailable;
#endif
void CalculateCPUSpeed ();
DWORD LanguageIDs[4] =
{
MAKE_ID ('e','n','u',0),
@ -182,97 +176,14 @@ void SetLanguageIDs ()
//
void I_Init (void)
{
#if !defined(X86_ASM) && !defined(X64_ASM)
memset (&CPU, 0, sizeof(CPU));
#else
CheckMMX (&CPU);
CalculateCPUSpeed ();
// Why does Intel right-justify this string?
char *f = CPU.CPUString, *t = f;
while (*f == ' ')
{
++f;
}
if (f != t)
{
while (*f != 0)
{
*t++ = *f++;
}
}
#endif
if (CPU.VendorID[0])
{
Printf ("CPU Vendor ID: %s\n", CPU.VendorID);
if (CPU.CPUString[0])
{
Printf (" Name: %s\n", CPU.CPUString);
}
if (CPU.bIsAMD)
{
Printf (" Family %d (%d), Model %d, Stepping %d\n",
CPU.Family, CPU.AMDFamily, CPU.AMDModel, CPU.AMDStepping);
}
else
{
Printf (" Family %d, Model %d, Stepping %d\n",
CPU.Family, CPU.Model, CPU.Stepping);
}
Printf (" Features:");
if (CPU.bMMX) Printf (" MMX");
if (CPU.bMMXPlus) Printf (" MMX+");
if (CPU.bSSE) Printf (" SSE");
if (CPU.bSSE2) Printf (" SSE2");
if (CPU.bSSE3) Printf (" SSE3");
if (CPU.b3DNow) Printf (" 3DNow!");
if (CPU.b3DNowPlus) Printf (" 3DNow!+");
Printf ("\n");
}
CheckCPUID (&CPU);
DumpCPUInfo (&CPU);
I_GetTime = I_GetTimePolled;
I_WaitForTic = I_WaitForTicPolled;
I_InitSound ();
}
void CalculateCPUSpeed ()
{
timeval start, stop, now;
cycle_t ClockCycles;
DWORD usec;
if (CPU.bRDTSC)
{
ClockCycles = 0;
clock (ClockCycles);
gettimeofday (&start, NULL);
// Count cycles for at least 100 milliseconds.
// We don't have the same accuracy we can get with the Win32
// performance counters, so we have to time longer.
stop.tv_usec = start.tv_usec + 100000;
stop.tv_sec = start.tv_sec;
if (stop.tv_usec >= 1000000)
{
stop.tv_usec -= 1000000;
stop.tv_sec += 1;
}
do
{
gettimeofday (&now, NULL);
} while (timercmp (&now, &stop, <));
unclock (ClockCycles);
gettimeofday (&now, NULL);
usec = now.tv_usec - start.tv_usec;
CyclesPerSecond = (double)ClockCycles * 1e6 / (double)usec;
SecondsPerCycle = 1.0 / CyclesPerSecond;
}
Printf (PRINT_HIGH, "CPU Speed: ~%f MHz\n", CyclesPerSecond / 1e6);
}
//
// I_Quit
//

View file

@ -45,80 +45,6 @@ enum
extern DWORD LanguageIDs[4];
extern void SetLanguageIDs ();
struct CPUInfo // 92 bytes
{
char VendorID[16];
char CPUString[48];
BYTE Stepping;
BYTE Model;
BYTE Family;
BYTE Type;
BYTE BrandIndex;
BYTE CLFlush;
BYTE CPUCount;
BYTE APICID;
DWORD bSSE3:1;
DWORD DontCare1:31;
DWORD bFPU:1;
DWORD bVME:1;
DWORD bDE:1;
DWORD bPSE:1;
DWORD bRDTSC:1;
DWORD bMSR:1;
DWORD bPAE:1;
DWORD bMCE:1;
DWORD bCX8:1;
DWORD bAPIC:1;
DWORD bReserved1:1;
DWORD bSEP:1;
DWORD bMTRR:1;
DWORD bPGE:1;
DWORD bMCA:1;
DWORD bCMOV:1;
DWORD bPAT:1;
DWORD bPSE36:1;
DWORD bPSN:1;
DWORD bCFLUSH:1;
DWORD bReserved2:1;
DWORD bDS:1;
DWORD bACPI:1;
DWORD bMMX:1;
DWORD bFXSR:1;
DWORD bSSE:1;
DWORD bSSE2:1;
DWORD bSS:1;
DWORD bHTT:1;
DWORD bTM:1;
DWORD bReserved3:1;
DWORD bPBE:1;
DWORD DontCare2:22;
DWORD bMMXPlus:1; // AMD's MMX extensions
DWORD bMMXAgain:1; // Just a copy of bMMX above
DWORD DontCare3:6;
DWORD b3DNowPlus:1;
DWORD b3DNow:1;
BYTE AMDStepping;
BYTE AMDModel;
BYTE AMDFamily;
BYTE bIsAMD;
BYTE DataL1LineSize;
BYTE DataL1LinesPerTag;
BYTE DataL1Associativity;
BYTE DataL1SizeKB;
};
extern "C" {
extern CPUInfo CPU;
}
// Called by DoomMain.
void I_Init (void);

View file

@ -375,9 +375,9 @@ void SDLFB::Update ()
LockCount = 0;
UpdatePending = false;
BlitCycles = 0;
SDLFlipCycles = 0;
clock (BlitCycles);
BlitCycles.Reset();
SDLFlipCycles.Reset();
BlitCycles.Clock();
if (SDL_LockSurface (Screen) == -1)
return;
@ -405,11 +405,11 @@ void SDLFB::Update ()
SDL_UnlockSurface (Screen);
clock (SDLFlipCycles);
SDLFlipCycles.Clock();
SDL_Flip (Screen);
unclock (SDLFlipCycles);
SDLFlipCycles.Unclock();
unclock (BlitCycles);
BlitCycles.Unclock();
if (NeedGammaUpdate)
{
@ -517,10 +517,7 @@ bool SDLFB::IsFullscreen ()
ADD_STAT (blit)
{
FString out;
out.Format (
"blit=%04.1f ms flip=%04.1f ms",
(double)BlitCycles * SecondsPerCycle * 1000,
(double)SDLFlipCycles * SecondsPerCycle * 1000
);
out.Format ("blit=%04.1f ms flip=%04.1f ms",
BlitCycles.Time() * 1e-3, SDLFlipCycles.TimeMS());
return out;
}

View file

@ -34,64 +34,76 @@
#ifndef __STATS_H__
#define __STATS_H__
#include "i_system.h"
#include "zstring.h"
extern "C" double SecondsPerCycle;
extern "C" double CyclesPerSecond;
#if _MSC_VER
#define _interlockedbittestandset64 hackfixfor
#define _interlockedbittestandreset64 x64compilation
#define _interlockedbittestandset wtfnmake
#define _interlockedbittestandreset doesittoo
#include <intrin.h>
#undef _interlockedbittestandset64
#undef _interlockedbittestandreset64
#undef _interlockedbittestandset
#undef _interlockedbittestandreset
typedef QWORD cycle_t;
inline cycle_t GetClockCycle ()
{
#if _M_X64
return __rdtsc();
#else
return CPU.bRDTSC ? __rdtsc() : 0;
#ifdef unix
#ifdef NO_CLOCK_GETTIME
class cycle_t
{
public:
cycle_t &operator= (const cycle_t &o) { return *this; }
void Reset() {}
void Clock() {}
void Unclock() {}
double Time() { return 0; }
double TimeMS() { return 0; }
};
#else
#include <time.h>
class cycle_t
{
public:
cycle_t &operator= (const cycle_t &o)
{
Sec = o.Sec;
return *this;
}
void Reset()
{
Sec = 0;
}
void Clock()
{
timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
Sec -= ts.tv_sec + ts.tv_nsec * 1e-9;
}
void Unclock()
{
timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
Sec += ts.tv_sec + ts.tv_nsec * 1e-9;
}
double Time()
{
return Sec;
}
double TimeMS()
{
return Sec * 1e3;
}
private:
double Sec;
};
#endif
#else
// Windows
#endif
}
#elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64__))
typedef unsigned long long cycle_t;
inline cycle_t GetClockCycle()
{
if (CPU.bRDTSC)
{
cycle_t res;
asm volatile ("rdtsc" : "=A" (res));
return res;
}
else
{
return 0;
}
}
#else
typedef QWORD cycle_t;
inline cycle_t GetClockCycle ()
{
return 0;
}
#endif
#define clock(v) {v -= GetClockCycle();}
#define unclock(v) {v += GetClockCycle() /*- 41*/;}
class FStat
{

View file

@ -27,7 +27,8 @@
#include "m_alloc.h"
#include "i_system.h"
#include "i_system.h"
#include "x86.h"
#include "i_video.h"
#include "r_local.h"
#include "r_draw.h"

View file

@ -52,6 +52,7 @@
#include "i_sound.h"
#include "i_music.h"
#include "resource.h"
#include "x86.h"
#include "d_main.h"
#include "d_net.h"
@ -73,7 +74,6 @@ extern "C"
{
double SecondsPerCycle = 1e-8;
double CyclesPerSecond = 1e8; // 100 MHz
CPUInfo CPU;
}
extern HWND Window, ConWindow, GameTitleWindow;
@ -346,50 +346,7 @@ void SetLanguageIDs ()
void I_Init (void)
{
CheckCPUID(&CPU);
CalculateCPUSpeed ();
// Why does Intel right-justify this string?
char *f = CPU.CPUString, *t = f;
while (*f == ' ')
{
++f;
}
if (f != t)
{
while (*f != 0)
{
*t++ = *f++;
}
}
if (CPU.VendorID[0])
{
Printf ("CPU Vendor ID: %s\n", CPU.VendorID);
if (CPU.CPUString[0])
{
Printf (" Name: %s\n", CPU.CPUString);
}
if (CPU.bIsAMD)
{
Printf (" Family %d (%d), Model %d, Stepping %d\n",
CPU.Family, CPU.AMDFamily, CPU.AMDModel, CPU.AMDStepping);
}
else
{
Printf (" Family %d, Model %d, Stepping %d\n",
CPU.Family, CPU.Model, CPU.Stepping);
}
Printf (" Features:");
if (CPU.bMMX) Printf (" MMX");
if (CPU.bMMXPlus) Printf (" MMX+");
if (CPU.bSSE) Printf (" SSE");
if (CPU.bSSE2) Printf (" SSE2");
if (CPU.bSSE3) Printf (" SSE3");
if (CPU.b3DNow) Printf (" 3DNow!");
if (CPU.b3DNowPlus) Printf (" 3DNow!+");
Printf ("\n");
}
DumpCPUInfo(&CPU);
// Use a timer event if possible
NewTicArrived = CreateEvent (NULL, FALSE, FALSE, NULL);
@ -433,54 +390,6 @@ void I_Init (void)
I_InitSound ();
}
void CalculateCPUSpeed ()
{
LARGE_INTEGER freq;
QueryPerformanceFrequency (&freq);
if (freq.QuadPart != 0 && CPU.bRDTSC)
{
LARGE_INTEGER count1, count2;
DWORD minDiff;
cycle_t ClockCalibration = 0;
// Count cycles for at least 55 milliseconds.
// The performance counter is very low resolution compared to CPU
// speeds today, so the longer we count, the more accurate our estimate.
// On the other hand, we don't want to count too long, because we don't
// want the user to notice us spend time here, since most users will
// probably never use the performance statistics.
minDiff = freq.LowPart * 11 / 200;
// Minimize the chance of task switching during the testing by going very
// high priority. This is another reason to avoid timing for too long.
SetPriorityClass (GetCurrentProcess (), REALTIME_PRIORITY_CLASS);
SetThreadPriority (GetCurrentThread (), THREAD_PRIORITY_TIME_CRITICAL);
clock (ClockCalibration);
QueryPerformanceCounter (&count1);
do
{
QueryPerformanceCounter (&count2);
} while ((DWORD)((unsigned __int64)count2.QuadPart - (unsigned __int64)count1.QuadPart) < minDiff);
unclock (ClockCalibration);
QueryPerformanceCounter (&count2);
SetPriorityClass (GetCurrentProcess (), NORMAL_PRIORITY_CLASS);
SetThreadPriority (GetCurrentThread (), THREAD_PRIORITY_NORMAL);
CyclesPerSecond = (double)ClockCalibration *
(double)freq.QuadPart /
(double)((__int64)count2.QuadPart - (__int64)count1.QuadPart);
SecondsPerCycle = 1.0 / CyclesPerSecond;
}
else
{
Printf ("Can't determine CPU speed, so pretending.\n");
}
Printf ("CPU Speed: %.0f MHz\n", CyclesPerSecond / 1e6);
}
//
// I_Quit
//

View file

@ -49,81 +49,7 @@ typedef enum {
} os_t;
extern os_t OSPlatform;
struct CPUInfo // 92 bytes
{
char VendorID[16]; // 0
char CPUString[48]; // 16
BYTE Stepping; // 64
BYTE Model; // 65
BYTE Family; // 66
BYTE Type; // 67
BYTE BrandIndex; // 68
BYTE CLFlush; // 69
BYTE CPUCount; // 70
BYTE APICID; // 71
DWORD bSSE3:1; // 72
DWORD DontCare1:31;
DWORD bFPU:1; // 76
DWORD bVME:1;
DWORD bDE:1;
DWORD bPSE:1;
DWORD bRDTSC:1;
DWORD bMSR:1;
DWORD bPAE:1;
DWORD bMCE:1;
DWORD bCX8:1; // 77
DWORD bAPIC:1;
DWORD bReserved1:1;
DWORD bSEP:1;
DWORD bMTRR:1;
DWORD bPGE:1;
DWORD bMCA:1;
DWORD bCMOV:1;
DWORD bPAT:1; // 78
DWORD bPSE36:1;
DWORD bPSN:1;
DWORD bCFLUSH:1;
DWORD bReserved2:1;
DWORD bDS:1;
DWORD bACPI:1;
DWORD bMMX:1;
DWORD bFXSR:1; // 79
DWORD bSSE:1;
DWORD bSSE2:1;
DWORD bSS:1;
DWORD bHTT:1;
DWORD bTM:1;
DWORD bReserved3:1;
DWORD bPBE:1;
DWORD DontCare2:22; // 80
DWORD bMMXPlus:1; // AMD's MMX extensions
DWORD bMMXAgain:1; // Just a copy of bMMX above
DWORD DontCare3:6;
DWORD b3DNowPlus:1;
DWORD b3DNow:1;
BYTE AMDStepping; // 84
BYTE AMDModel; // 85
BYTE AMDFamily; // 86
BYTE bIsAMD; // 87
BYTE DataL1LineSize; // 88
BYTE DataL1LinesPerTag; // 89
BYTE DataL1Associativity;//90
BYTE DataL1SizeKB; // 91
};
extern "C" {
extern CPUInfo CPU;
}
// Called by DoomMain.
void I_Init (void);

View file

@ -3,9 +3,15 @@
#endif
#include <mmintrin.h>
#include <emmintrin.h>
#include "doomtype.h"
#include "i_system.h"
#include "doomtype.h"
#include "doomdef.h"
#include "x86.h"
extern "C"
{
CPUInfo CPU;
}
#ifdef __GNUC__
#define __cpuid(output, func) __asm__ __volatile__("cpuid" : "=a" ((output)[0]),\
@ -95,8 +101,11 @@ haveid:
cpu->Family = (foo[0] & 0xF00) >> 8;
if (cpu->Family == 15)
{ // Add extended model and family.
cpu->Family += (foo[0] >> 20) & 0xFF;
{ // Add extended family.
cpu->Family += (foo[0] >> 20) & 0xFF;
}
if (cpu->Family == 6 || cpu->Family == 15)
{ // Add extended model ID.
cpu->Model |= (foo[0] >> 12) & 0xF0;
}
@ -135,6 +144,65 @@ haveid:
#endif
}
void DumpCPUInfo(const CPUInfo *cpu)
{
char cpustring[4*4*3+1];
// Why does Intel right-justify this string (on P4s)
// or add extra spaces (on Cores)?
const char *f = cpu->CPUString;
char *t;
// Skip extra whitespace at the beginning.
while (*f == ' ')
{
++f;
}
// Copy string to temp buffer, but condense consecutive
// spaces to a single space character.
for (t = cpustring; *f != '\0'; ++f)
{
if (*f == ' ' && *(f - 1) == ' ')
{
continue;
}
*t++ = *f;
}
*t = '\0';
if (cpu->VendorID[0])
{
Printf("CPU Vendor ID: %s\n", cpu->VendorID);
if (cpustring[0])
{
Printf(" Name: %s\n", cpustring);
}
if (cpu->bIsAMD)
{
Printf(" Family %d (%d), Model %d, Stepping %d\n",
cpu->Family, cpu->AMDFamily, cpu->AMDModel, cpu->AMDStepping);
}
else
{
Printf(" Family %d, Model %d, Stepping %d\n",
cpu->Family, cpu->Model, cpu->Stepping);
}
Printf(" Features:");
if (cpu->bMMX) Printf(" MMX");
if (cpu->bMMXPlus) Printf(" MMX+");
if (cpu->bSSE) Printf(" SSE");
if (cpu->bSSE2) Printf(" SSE2");
if (cpu->bSSE3) Printf(" SSE3");
if (cpu->bSSSE3) Printf(" SSSE3");
if (cpu->bSSE41) Printf(" SSE4.1");
if (cpu->bSSE42) Printf(" SSE4.2");
if (cpu->b3DNow) Printf(" 3DNow!");
if (cpu->b3DNowPlus) Printf(" 3DNow!+");
Printf ("\n");
}
}
#if 0
// Compiler output for this function is crap compared to the assembly

86
src/x86.h Normal file
View file

@ -0,0 +1,86 @@
#ifndef X86_H
#define X86_H
struct CPUInfo // 92 bytes
{
char VendorID[16];
char CPUString[48];
BYTE Stepping;
BYTE Model;
BYTE Family;
BYTE Type;
BYTE BrandIndex;
BYTE CLFlush;
BYTE CPUCount;
BYTE APICID;
DWORD bSSE3:1;
DWORD DontCare1:8;
DWORD bSSSE3:1;
DWORD DontCare1a:9;
DWORD bSSE41:1;
DWORD bSSE42:1;
DWORD DontCare2a:11;
DWORD bFPU:1;
DWORD bVME:1;
DWORD bDE:1;
DWORD bPSE:1;
DWORD bRDTSC:1;
DWORD bMSR:1;
DWORD bPAE:1;
DWORD bMCE:1;
DWORD bCX8:1;
DWORD bAPIC:1;
DWORD bReserved1:1;
DWORD bSEP:1;
DWORD bMTRR:1;
DWORD bPGE:1;
DWORD bMCA:1;
DWORD bCMOV:1;
DWORD bPAT:1;
DWORD bPSE36:1;
DWORD bPSN:1;
DWORD bCFLUSH:1;
DWORD bReserved2:1;
DWORD bDS:1;
DWORD bACPI:1;
DWORD bMMX:1;
DWORD bFXSR:1;
DWORD bSSE:1;
DWORD bSSE2:1;
DWORD bSS:1;
DWORD bHTT:1;
DWORD bTM:1;
DWORD bReserved3:1;
DWORD bPBE:1;
DWORD DontCare2:22;
DWORD bMMXPlus:1; // AMD's MMX extensions
DWORD bMMXAgain:1; // Just a copy of bMMX above
DWORD DontCare3:6;
DWORD b3DNowPlus:1;
DWORD b3DNow:1;
BYTE AMDStepping;
BYTE AMDModel;
BYTE AMDFamily;
BYTE bIsAMD;
BYTE DataL1LineSize;
BYTE DataL1LinesPerTag;
BYTE DataL1Associativity;
BYTE DataL1SizeKB;
};
extern "C" CPUInfo CPU;
void CheckCPUID (CPUInfo *cpu);
void DumpCPUInfo (const CPUInfo *cpu);
void DoBlending_SSE2(const PalEntry *from, PalEntry *to, int count, int r, int g, int b, int a);
#endif