/*
===========================================================================
Doom 3 BFG Edition GPL Source Code
Copyright (C) 1993-2012 id Software LLC, a ZeniMax Media company.
Copyright (C) 2014 Robert Beckebans
This file is part of the Doom 3 BFG Edition GPL Source Code ("Doom 3 BFG Edition Source Code").
Doom 3 BFG Edition Source Code is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Doom 3 BFG Edition Source Code is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Doom 3 BFG Edition Source Code. If not, see .
In addition, the Doom 3 BFG Edition Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 BFG Edition Source Code. If not, please request a copy in writing from id Software at the address below.
If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
===========================================================================
*/
#pragma hdrstop
#include "precompiled.h"
#include "tr_local.h"
/*
==========================================================================================
FRAME MEMORY ALLOCATION
==========================================================================================
*/
static const unsigned int NUM_FRAME_DATA = 2;
static const unsigned int FRAME_ALLOC_ALIGNMENT = 128;
static const unsigned int MAX_FRAME_MEMORY = 64 * 1024 * 1024; // larger so that we can noclip on PC for dev purposes
idFrameData smpFrameData[NUM_FRAME_DATA];
idFrameData* frameData;
unsigned int smpFrame;
//#define TRACK_FRAME_ALLOCS
#if defined( TRACK_FRAME_ALLOCS )
idSysInterlockedInteger frameAllocTypeCount[FRAME_ALLOC_MAX];
int frameHighWaterTypeCount[FRAME_ALLOC_MAX];
#endif
/*
====================
R_ToggleSmpFrame
====================
*/
void R_ToggleSmpFrame()
{
// update the highwater mark
if( frameData->frameMemoryAllocated.GetValue() > frameData->highWaterAllocated )
{
frameData->highWaterAllocated = frameData->frameMemoryAllocated.GetValue();
#if defined( TRACK_FRAME_ALLOCS )
frameData->highWaterUsed = frameData->frameMemoryUsed.GetValue();
for( int i = 0; i < FRAME_ALLOC_MAX; i++ )
{
frameHighWaterTypeCount[i] = frameAllocTypeCount[i].GetValue();
}
#endif
}
// switch to the next frame
smpFrame++;
frameData = &smpFrameData[smpFrame % NUM_FRAME_DATA];
// reset the memory allocation
// RB: 64 bit fixes, changed unsigned int to uintptr_t
const uintptr_t bytesNeededForAlignment = FRAME_ALLOC_ALIGNMENT - ( ( uintptr_t )frameData->frameMemory & ( FRAME_ALLOC_ALIGNMENT - 1 ) );
// RB end
frameData->frameMemoryAllocated.SetValue( bytesNeededForAlignment );
frameData->frameMemoryUsed.SetValue( 0 );
#if defined( TRACK_FRAME_ALLOCS )
for( int i = 0; i < FRAME_ALLOC_MAX; i++ )
{
frameAllocTypeCount[i].SetValue( 0 );
}
#endif
// clear the command chain and make a RC_NOP command the only thing on the list
frameData->cmdHead = frameData->cmdTail = ( emptyCommand_t* )R_FrameAlloc( sizeof( *frameData->cmdHead ), FRAME_ALLOC_DRAW_COMMAND );
frameData->cmdHead->commandId = RC_NOP;
frameData->cmdHead->next = NULL;
}
/*
=====================
R_ShutdownFrameData
=====================
*/
void R_ShutdownFrameData()
{
frameData = NULL;
for( int i = 0; i < NUM_FRAME_DATA; i++ )
{
Mem_Free16( smpFrameData[i].frameMemory );
smpFrameData[i].frameMemory = NULL;
}
}
/*
=====================
R_InitFrameData
=====================
*/
void R_InitFrameData()
{
R_ShutdownFrameData();
for( int i = 0; i < NUM_FRAME_DATA; i++ )
{
smpFrameData[i].frameMemory = ( byte* ) Mem_Alloc16( MAX_FRAME_MEMORY, TAG_RENDER );
}
// must be set before calling R_ToggleSmpFrame()
frameData = &smpFrameData[ 0 ];
R_ToggleSmpFrame();
}
/*
================
R_FrameAlloc
This data will be automatically freed when the
current frame's back end completes.
This should only be called by the front end. The
back end shouldn't need to allocate memory.
All temporary data, like dynamic tesselations
and local spaces are allocated here.
All memory is cache-line-cleared for the best performance.
================
*/
void* R_FrameAlloc( int bytes, frameAllocType_t type )
{
#if defined( TRACK_FRAME_ALLOCS )
frameData->frameMemoryUsed.Add( bytes );
frameAllocTypeCount[type].Add( bytes );
#endif
bytes = ( bytes + FRAME_ALLOC_ALIGNMENT - 1 ) & ~( FRAME_ALLOC_ALIGNMENT - 1 );
// thread safe add
int end = frameData->frameMemoryAllocated.Add( bytes );
if( end > MAX_FRAME_MEMORY )
{
idLib::Error( "R_FrameAlloc ran out of memory. bytes = %d, end = %d, highWaterAllocated = %d\n", bytes, end, frameData->highWaterAllocated );
}
byte* ptr = frameData->frameMemory + end - bytes;
// cache line clear the memory
for( int offset = 0; offset < bytes; offset += CACHE_LINE_SIZE )
{
ZeroCacheLine( ptr, offset );
}
return ptr;
}
/*
==================
R_ClearedFrameAlloc
==================
*/
void* R_ClearedFrameAlloc( int bytes, frameAllocType_t type )
{
// NOTE: every allocation is cache line cleared
return R_FrameAlloc( bytes, type );
}
/*
==========================================================================================
FONT-END STATIC MEMORY ALLOCATION
==========================================================================================
*/
/*
=================
R_StaticAlloc
=================
*/
void* R_StaticAlloc( int bytes, const memTag_t tag )
{
tr.pc.c_alloc++;
void* buf = Mem_Alloc( bytes, tag );
// don't exit on failure on zero length allocations since the old code didn't
if( buf == NULL && bytes != 0 )
{
common->FatalError( "R_StaticAlloc failed on %i bytes", bytes );
}
return buf;
}
/*
=================
R_ClearedStaticAlloc
=================
*/
void* R_ClearedStaticAlloc( int bytes )
{
void* buf = R_StaticAlloc( bytes );
memset( buf, 0, bytes );
return buf;
}
/*
=================
R_StaticFree
=================
*/
void R_StaticFree( void* data )
{
tr.pc.c_free++;
Mem_Free( data );
}
/*
==========================================================================================
FONT-END RENDERING
==========================================================================================
*/
/*
=================
R_SortDrawSurfs
=================
*/
static void R_SortDrawSurfs( drawSurf_t** drawSurfs, const int numDrawSurfs )
{
#if 1
uint64* indices = ( uint64* ) _alloca16( numDrawSurfs * sizeof( indices[0] ) );
// sort the draw surfs based on:
// 1. sort value (largest first)
// 2. depth (smallest first)
// 3. index (largest first)
assert( numDrawSurfs <= 0xFFFF );
for( int i = 0; i < numDrawSurfs; i++ )
{
float sort = SS_POST_PROCESS - drawSurfs[i]->sort;
assert( sort >= 0.0f );
uint64 dist = 0;
if( drawSurfs[i]->frontEndGeo != NULL )
{
float min = 0.0f;
float max = 1.0f;
idRenderMatrix::DepthBoundsForBounds( min, max, drawSurfs[i]->space->mvp, drawSurfs[i]->frontEndGeo->bounds );
dist = idMath::Ftoui16( min * 0xFFFF );
}
indices[i] = ( ( numDrawSurfs - i ) & 0xFFFF ) | ( dist << 16 ) | ( ( uint64 )( *( uint32* )&sort ) << 32 );
}
const int64 MAX_LEVELS = 128;
int64 lo[MAX_LEVELS];
int64 hi[MAX_LEVELS];
// Keep the top of the stack in registers to avoid load-hit-stores.
register int64 st_lo = 0;
register int64 st_hi = numDrawSurfs - 1;
register int64 level = 0;
for( ; ; )
{
register int64 i = st_lo;
register int64 j = st_hi;
if( j - i >= 4 && level < MAX_LEVELS - 1 )
{
register uint64 pivot = indices[( i + j ) / 2];
do
{
while( indices[i] > pivot ) i++;
while( indices[j] < pivot ) j--;
if( i > j ) break;
uint64 h = indices[i];
indices[i] = indices[j];
indices[j] = h;
}
while( ++i <= --j );
// No need for these iterations because we are always sorting unique values.
//while ( indices[j] == pivot && st_lo < j ) j--;
//while ( indices[i] == pivot && i < st_hi ) i++;
assert( level < MAX_LEVELS - 1 );
lo[level] = i;
hi[level] = st_hi;
st_hi = j;
level++;
}
else
{
for( ; i < j; j-- )
{
register int64 m = i;
for( int64 k = i + 1; k <= j; k++ )
{
if( indices[k] < indices[m] )
{
m = k;
}
}
uint64 h = indices[m];
indices[m] = indices[j];
indices[j] = h;
}
if( --level < 0 )
{
break;
}
st_lo = lo[level];
st_hi = hi[level];
}
}
drawSurf_t** newDrawSurfs = ( drawSurf_t** ) indices;
for( int i = 0; i < numDrawSurfs; i++ )
{
newDrawSurfs[i] = drawSurfs[numDrawSurfs - ( indices[i] & 0xFFFF )];
}
memcpy( drawSurfs, newDrawSurfs, numDrawSurfs * sizeof( drawSurfs[0] ) );
#else
struct local_t
{
static int R_QsortSurfaces( const void* a, const void* b )
{
const drawSurf_t* ea = *( drawSurf_t** )a;
const drawSurf_t* eb = *( drawSurf_t** )b;
if( ea->sort < eb->sort )
{
return -1;
}
if( ea->sort > eb->sort )
{
return 1;
}
return 0;
}
};
// Add a sort offset so surfaces with equal sort orders still deterministically
// draw in the order they were added, at least within a given model.
float sorfOffset = 0.0f;
for( int i = 0; i < numDrawSurfs; i++ )
{
drawSurf[i]->sort += sorfOffset;
sorfOffset += 0.000001f;
}
// sort the drawsurfs
qsort( drawSurfs, numDrawSurfs, sizeof( drawSurfs[0] ), local_t::R_QsortSurfaces );
#endif
}
// RB begin
static void R_SetupSplitFrustums( viewDef_t* viewDef )
{
idVec3 planeOrigin;
const float zNearStart = ( viewDef->renderView.cramZNear ) ? ( r_znear.GetFloat() * 0.25f ) : r_znear.GetFloat();
float zFarEnd = 10000;
float zNear = zNearStart;
float zFar = zFarEnd;
float lambda = r_shadowMapSplitWeight.GetFloat();
float ratio = zFarEnd / zNearStart;
for( int i = 0; i < 6; i++ )
{
tr.viewDef->frustumSplitDistances[i] = idMath::INFINITY;
}
for( int i = 1; i <= ( r_shadowMapSplits.GetInteger() + 1 ) && i < MAX_FRUSTUMS; i++ )
{
float si = i / ( float )( r_shadowMapSplits.GetInteger() + 1 );
if( i > FRUSTUM_CASCADE1 )
{
zNear = zFar - ( zFar * 0.005f );
}
zFar = 1.005f * lambda * ( zNearStart * powf( ratio, si ) ) + ( 1 - lambda ) * ( zNearStart + ( zFarEnd - zNearStart ) * si );
if( i <= r_shadowMapSplits.GetInteger() )
{
tr.viewDef->frustumSplitDistances[i - 1] = zFar;
}
float projectionMatrix[16];
R_SetupProjectionMatrix2( tr.viewDef, zNear, zFar, projectionMatrix );
// setup render matrices for faster culling
idRenderMatrix projectionRenderMatrix;
idRenderMatrix::Transpose( *( idRenderMatrix* )projectionMatrix, projectionRenderMatrix );
idRenderMatrix viewRenderMatrix;
idRenderMatrix::Transpose( *( idRenderMatrix* )tr.viewDef->worldSpace.modelViewMatrix, viewRenderMatrix );
idRenderMatrix::Multiply( projectionRenderMatrix, viewRenderMatrix, tr.viewDef->frustumMVPs[i] );
// the planes of the view frustum are needed for portal visibility culling
idRenderMatrix::GetFrustumPlanes( tr.viewDef->frustums[i], tr.viewDef->frustumMVPs[i], false, true );
// the DOOM 3 frustum planes point outside the frustum
for( int j = 0; j < 6; j++ )
{
tr.viewDef->frustums[i][j] = - tr.viewDef->frustums[i][j];
}
// remove the Z-near to avoid portals from being near clipped
if( i == FRUSTUM_CASCADE1 )
{
tr.viewDef->frustums[i][4][3] -= r_znear.GetFloat();
}
}
}
// RB end
/*
================
R_RenderView
A view may be either the actual camera view,
a mirror / remote location, or a 3D view on a gui surface.
Parms will typically be allocated with R_FrameAlloc
================
*/
void R_RenderView( viewDef_t* parms )
{
// save view in case we are a subview
viewDef_t* oldView = tr.viewDef;
tr.viewDef = parms;
// setup the matrix for world space to eye space
R_SetupViewMatrix( tr.viewDef );
// we need to set the projection matrix before doing
// portal-to-screen scissor calculations
R_SetupProjectionMatrix( tr.viewDef );
// setup render matrices for faster culling
idRenderMatrix::Transpose( *( idRenderMatrix* )tr.viewDef->projectionMatrix, tr.viewDef->projectionRenderMatrix );
idRenderMatrix viewRenderMatrix;
idRenderMatrix::Transpose( *( idRenderMatrix* )tr.viewDef->worldSpace.modelViewMatrix, viewRenderMatrix );
idRenderMatrix::Multiply( tr.viewDef->projectionRenderMatrix, viewRenderMatrix, tr.viewDef->worldSpace.mvp );
// the planes of the view frustum are needed for portal visibility culling
idRenderMatrix::GetFrustumPlanes( tr.viewDef->frustums[FRUSTUM_PRIMARY], tr.viewDef->worldSpace.mvp, false, true );
// the DOOM 3 frustum planes point outside the frustum
for( int i = 0; i < 6; i++ )
{
tr.viewDef->frustums[FRUSTUM_PRIMARY][i] = - tr.viewDef->frustums[FRUSTUM_PRIMARY][i];
}
// remove the Z-near to avoid portals from being near clipped
tr.viewDef->frustums[FRUSTUM_PRIMARY][4][3] -= r_znear.GetFloat();
// RB begin
R_SetupSplitFrustums( tr.viewDef );
// RB end
// identify all the visible portal areas, and create view lights and view entities
// for all the the entityDefs and lightDefs that are in the visible portal areas
static_cast( parms->renderWorld )->FindViewLightsAndEntities();
// wait for any shadow volume jobs from the previous frame to finish
tr.frontEndJobList->Wait();
// make sure that interactions exist for all light / entity combinations that are visible
// add any pre-generated light shadows, and calculate the light shader values
R_AddLights();
// adds ambient surfaces and create any necessary interaction surfaces to add to the light lists
R_AddModels();
// build up the GUIs on world surfaces
R_AddInGameGuis( tr.viewDef->drawSurfs, tr.viewDef->numDrawSurfs );
// any viewLight that didn't have visible surfaces can have it's shadows removed
R_OptimizeViewLightsList();
// sort all the ambient surfaces for translucency ordering
R_SortDrawSurfs( tr.viewDef->drawSurfs, tr.viewDef->numDrawSurfs );
// generate any subviews (mirrors, cameras, etc) before adding this view
if( R_GenerateSubViews( tr.viewDef->drawSurfs, tr.viewDef->numDrawSurfs ) )
{
// if we are debugging subviews, allow the skipping of the main view draw
if( r_subviewOnly.GetBool() )
{
return;
}
}
// write everything needed to the demo file
if( common->WriteDemo() )
{
static_cast( parms->renderWorld )->WriteVisibleDefs( tr.viewDef );
}
// add the rendering commands for this viewDef
R_AddDrawViewCmd( parms, false );
// restore view in case we are a subview
tr.viewDef = oldView;
}
/*
================
R_RenderPostProcess
Because R_RenderView may be called by subviews we have to make sure the post process
pass happens after the active view and its subviews is done rendering.
================
*/
void R_RenderPostProcess( viewDef_t* parms )
{
viewDef_t* oldView = tr.viewDef;
R_AddDrawPostProcess( parms );
tr.viewDef = oldView;
}