/*
===========================================================================
Copyright (C) 2019-2020 Gian 'myT' Schellenbaum

This file is part of Challenge Quake 3 (CNQ3).

Challenge Quake 3 is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the License,
or (at your option) any later version.

Challenge Quake 3 is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Challenge Quake 3. If not, see <https://www.gnu.org/licenses/>.
===========================================================================
*/
// Direct3D 11 rendering back-end

#if defined(_WIN32)


#include "tr_local.h"
#include <Windows.h>
#include <d3d11.h>
#include <dxgi.h>
#include <dxgi1_2.h>
#include <dxgi1_3.h>

#pragma region Windows 10 SDK

	#if !defined(__dxgicommon_h__)
		enum DXGI_COLOR_SPACE_TYPE;
	#endif
	#include "dxgi/dxgi1_4.h"
	#include "dxgi/dxgi1_5.h"

	#if !defined(DXGI_PRESENT_ALLOW_TEARING)
		#define DXGI_PRESENT_ALLOW_TEARING 0x00000200UL
	#endif

	#if !defined(DXGI_SWAP_EFFECT_FLIP_DISCARD)
		#define DXGI_SWAP_EFFECT_FLIP_DISCARD ((DXGI_SWAP_EFFECT)4)
	#endif

	#if !defined(DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING)
		#define DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING ((DXGI_SWAP_CHAIN_FLAG)2048)
	#endif

#pragma endregion

#include "hlsl/generic_vs.h"
#include "hlsl/generic_ps.h"
#include "hlsl/generic_a_ps.h"
#include "hlsl/generic_d_ps.h"
#include "hlsl/generic_ad_ps.h"
#include "hlsl/post_vs.h"
#include "hlsl/post_ps.h"
#include "hlsl/clear_vs.h"
#include "hlsl/clear_ps.h"
#include "hlsl/dl_vs.h"
#include "hlsl/dl_ps.h"
#include "hlsl/sprite_vs.h"
#include "hlsl/sprite_ps.h"
#include "hlsl/mip_start_cs.h"
#include "hlsl/mip_pass_cs.h"
#include "hlsl/mip_end_cs.h"

struct ShaderDesc
{
	const void* code;
	size_t size;
	const char* name;
};

static ShaderDesc genericPixelShaders[4] = 
{
	{ g_generic_ps, ARRAY_LEN(g_generic_ps), "generic pixel shader" },
	{ g_generic_a_ps, ARRAY_LEN(g_generic_a_ps), "generic A2C pixel shader" },
	{ g_generic_d_ps, ARRAY_LEN(g_generic_d_ps), "generic dithered pixel shader" },
	{ g_generic_ad_ps, ARRAY_LEN(g_generic_ad_ps), "generic dithered A2C pixel shader" }
};

#if defined(near)
#	undef near
#endif

#if defined(far)
#	undef far
#endif

#if !defined(D3DDDIERR_DEVICEREMOVED)
#	define D3DDDIERR_DEVICEREMOVED ((HRESULT)0x88760870L)
#endif

#define MAX_GPU_TEXTURE_SIZE 2048 // instead of D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION

#define BLEND_STATE_COUNT (D3D11_BLEND_SRC_ALPHA_SAT + 1)

// a special addition used by the partial depth clear pass
#define GLS_DEPTHFUNC_ALWAYS 0x80000000


/*
Current info:
- feature level 10.1 minimum
- feature level 11.0 for mip-map generation with compute
- shader target 4.1 for graphics (SV_VertexID, unsized Texture2DMS)
- shader target 5.0 for compute  (typed UAVs)

Known issues:
- device re-creation isn't handled by OBS' capture plug-in
*/


enum AlphaTest
{
	AT_ALWAYS,
	AT_GREATER_THAN_0,
	AT_LESS_THAN_HALF,
	AT_GREATER_OR_EQUAL_TO_HALF
};

enum PipelineId
{
	PID_GENERIC,
	PID_SOFT_SPRITE,
	PID_DYNAMIC_LIGHT,
	PID_POST_PROCESS,
	PID_SCREENSHOT,
	PID_CLEAR,
	PID_COUNT
};

enum ErrorMode
{
	EM_FATAL,
	EM_SILENT
};

enum VertexBufferId
{
	VB_POSITION,
	VB_NORMAL,
	VB_TEXCOORD,
	VB_TEXCOORD2,
	VB_COLOR,
	VB_COUNT
};

enum TextureMode
{
	TM_BILINEAR,
	TM_ANISOTROPIC,
	TM_NEAREST,
	TM_COUNT
};

enum DepthFunc
{
	DF_LEQUAL,
	DF_EQUAL,
	DF_ALWAYS,
	DF_COUNT
};

// @NOTE: MSDN says "you must set the ByteWidth value of D3D11_BUFFER_DESC in multiples of 16"
#pragma pack(push, 16)

struct GenericVSData
{
	float modelViewMatrix[16];
	float projectionMatrix[16];
	float clipPlane[4];
};

struct GenericPSData
{
	uint32_t alphaTest; // AlphaTest enum
	uint32_t texEnv; // texEnv_t enum
	float seed[2];
	float invGamma;
	float invBrightness;
	float noiseScale;
	float alphaBoost;
};

struct DepthFadeVSData
{
	float modelViewMatrix[16];
	float projectionMatrix[16];
	float clipPlane[4];
};

struct DepthFadePSData
{
	uint32_t alphaTest; // AlphaTest enum
	float distance;
	float offset;
	float dummy;
	float scale[4];
	float bias[4];
};

struct DynamicLightVSData
{
	float modelViewMatrix[16];
	float projectionMatrix[16];
	float clipPlane[4];
	float osLightPos[4];
	float osEyePos[4];
};

struct DynamicLightPSData
{
	float lightColor[3];
	float lightRadius;
	float opaque;
	float intensity;
	float dummy[2];
};

struct PostVSData
{
	float scaleX;
	float scaleY;
	float dummy[2];
};

struct PostPSData
{
	float gamma;
	float brightness;
	float greyscale;
	float dummy;
};

struct ClearPSData
{
	float color[4];
};

struct Down4CSData
{
	float weights[4];
	uint32_t maxSize[2];
	uint32_t scale[2];
	uint32_t offset[2];
	uint32_t clampMode; // 0 = repeat
	uint32_t dummy;
};

struct LinearToGammaCSData
{
	float blendColor[4];
	float intensity;
	float invGamma;
	float dummy[2];
};

struct GammaToLinearCSData
{
	float gamma;
	float dummy[3];
};

#pragma pack(pop)

struct Texture
{
	ID3D11Texture2D* texture;
	ID3D11ShaderResourceView* view;
};

struct Pipeline
{
	ID3D11VertexShader* vertexShader;
	ID3D11PixelShader* pixelShader;
	ID3D11InputLayout* inputLayout; // can be NULL
	ID3D11Buffer* vertexBuffer; // can be NULL
	ID3D11Buffer* pixelBuffer; // can be NULL
};

struct MipGenTexture
{
	ID3D11Texture2D* texture;
	ID3D11ShaderResourceView* srv;
	ID3D11UnorderedAccessView* uav;
};

struct VertexBuffer
{
	ID3D11Buffer* buffer;
	int itemSize;
	int capacity;
	int writeIndex;
	int readIndex;
	qbool discard;
};

struct AdapterInfo
{
	qbool valid;
	int dedicatedSystemMemoryMB;
	int dedicatedVideoMemoryMB;
	int sharedSystemMemoryMB;
};

struct FrameQueries
{
	ID3D11Query* disjoint;
	ID3D11Query* frameStart;
	ID3D11Query* frameEnd;
	qbool valid;
};

struct Direct3D
{
	// constant buffer data
	PostVSData postVSData;
	PostPSData postPSData;
	ClearPSData clearPSData;
	float modelViewMatrix[16];
	float projectionMatrix[16];
	float clipPlane[4];
	float osLightPos[4];
	float osEyePos[4];
	float lightColor[3];
	float lightRadius;
	AlphaTest alphaTest;
	texEnv_t texEnv;
	float frameSeed[2];

	DXGI_FORMAT formatColorRT;
	DXGI_FORMAT formatDepth;     // float: DXGI_FORMAT_R32_TYPELESS
	DXGI_FORMAT formatDepthRTV;  // float: DXGI_FORMAT_R32_FLOAT
	DXGI_FORMAT formatDepthView; // float: DXGI_FORMAT_D32_FLOAT

	Texture textures[MAX_DRAWIMAGES];
	int textureCount;

	ID3D11SamplerState* samplerStates[TW_COUNT * TM_COUNT];
	int samplerStateIndices[2];

	ID3D11BlendState* blendStates[2 * BLEND_STATE_COUNT * BLEND_STATE_COUNT];
	int blendStateIndex;

	ID3D11DepthStencilState* depthStencilStates[2 * 2 * DF_COUNT];
	int depthStencilStateIndex;

	ID3D11RasterizerState* rasterStates[12];
	int rasterStateIndex;

	ID3D11ShaderResourceView* pixelShaderResources[2];

	Pipeline pipelines[PID_COUNT];
	PipelineId pipelineIndex;

	MipGenTexture mipGenTextures[3]; // 0,1=float16  2=uint8

	VertexBuffer vertexBuffers[VB_COUNT];
	VertexBuffer indexBuffer;

	// for the calls to IASetVertexBuffers
	VertexBufferId vbIds[VB_COUNT];
	ID3D11Buffer* vbBuffers[VB_COUNT];
	UINT vbStrides[VB_COUNT];
	int vbCount;
	qbool splitBufferOffsets;

	ID3D11Texture2D* backBufferTexture;
	ID3D11RenderTargetView* backBufferRTView;
	ID3D11Texture2D* renderTargetTextureMS;
	ID3D11RenderTargetView* renderTargetViewMS;
	ID3D11Texture2D* resolveTexture;
	ID3D11ShaderResourceView* resolveTextureShaderView;
	ID3D11Texture2D* depthStencilTexture;
	ID3D11DepthStencilView* depthStencilView;
	ID3D11ShaderResourceView* depthStencilShaderView;
	ID3D11Texture2D* readbackTexture; // allowed to be NULL!
	ID3D11Texture2D* screenshotTexture; // allowed to be NULL!
	ID3D11RenderTargetView* screenshotTextureRTView; // allowed to be NULL!

	ID3D11ComputeShader* mipGammaToLinearComputeShader;
	ID3D11ComputeShader* mipLinearToGammaComputeShader;
	ID3D11ComputeShader* mipDownSampleComputeShader;
	ID3D11Buffer* mipDownSampleConstBuffer;
	ID3D11Buffer* mipLinearToGammaConstBuffer;
	ID3D11Buffer* mipGammaToLinearConstBuffer;

	FrameQueries frameQueries[32];
	int frameQueriesWriteIndex;
	int frameQueriesReadIndex;

	// cached when starting sky rendering
	float oldSkyClipPlane[4];
	D3D11_VIEWPORT oldSkyViewport;

	ErrorMode errorMode;
};

struct Direct3DStatic
{
	ID3D11Device* device;
	ID3D11DeviceContext* context;
	IDXGISwapChain* swapChain;

	HMODULE library;
	qbool flipAndTear;

	AdapterInfo adapterInfo;
};

__declspec(align(16)) static Direct3D d3d;
__declspec(align(16)) static Direct3DStatic d3ds;


#define COM_RELEASE(p)			do { if(p) { p->Release(); p = NULL; } } while((void)0,0)
#define COM_RELEASE_ARRAY(a)	do { for(int i = 0; i < ARRAY_LEN(a); ++i) { COM_RELEASE(a[i]); } } while((void)0,0)


static void GAL_UpdateTexture(image_t* image, int mip, int x, int y, int w, int h, const void* data);


static const char* GetSystemErrorString(HRESULT hr)
{
	// FormatMessage might not always give us the string we want but that's ok,
	// we always print the original error code anyhow
	static char systemErrorStr[1024];
	const DWORD written = FormatMessageA(
		FORMAT_MESSAGE_FROM_SYSTEM, NULL, (DWORD)hr, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
		systemErrorStr, sizeof(systemErrorStr) - 1, NULL);
	if(written == 0)
	{
		// we have nothing valid
		Q_strncpyz(systemErrorStr, "???", sizeof(systemErrorStr));
	}
	else
	{
		// remove the trailing whitespace
		char* s = systemErrorStr + strlen(systemErrorStr) - 1;
		while(s >= systemErrorStr)
		{
			if(*s == '\r' || *s == '\n' || *s == '\t' || *s == ' ')
			{
				*s-- = '\0';
			}
			else
			{
				break;
			}
		}
	}

	return systemErrorStr;
}

static qbool Check(HRESULT hr, const char* function)
{
	if(SUCCEEDED(hr))
	{
		return qtrue;
	}

	if(d3d.errorMode == EM_FATAL)
	{
		ri.Error(ERR_FATAL, "'%s' failed with code 0x%08X (%s)\n", function, (unsigned int)hr, GetSystemErrorString(hr));
	}
	return qfalse;
}

static qbool CheckAndName(HRESULT hr, const char* function, ID3D11DeviceChild* resource, const char* resourceName)
{
	if(SUCCEEDED(hr))
	{
		resource->SetPrivateData(WKPDID_D3DDebugObjectName, strlen(resourceName), resourceName);
		return qtrue;
	}

	if(d3d.errorMode == EM_FATAL)
	{
		ri.Error(ERR_FATAL, "'%s' failed to create '%s' with code 0x%08X (%s)\n", function, resourceName, (unsigned int)hr, GetSystemErrorString(hr));
	}
	return qfalse;
}

static qbool D3D11_CreateRenderTargetView(ID3D11Resource* pResource, const D3D11_RENDER_TARGET_VIEW_DESC* pDesc, ID3D11RenderTargetView** ppRTView, const char* name)
{
	const HRESULT hr = d3ds.device->CreateRenderTargetView(pResource, pDesc, ppRTView);
	return CheckAndName(hr, "CreateRenderTargetView", *ppRTView, name);
}

static qbool D3D11_CreateTexture2D(const D3D11_TEXTURE2D_DESC* pDesc, const D3D11_SUBRESOURCE_DATA* pInitialData, ID3D11Texture2D** ppTexture2D, const char* name)
{
	const HRESULT hr = d3ds.device->CreateTexture2D(pDesc, pInitialData, ppTexture2D);
	return CheckAndName(hr, "CreateTexture2D", *ppTexture2D, name);
}

static qbool D3D11_CreateShaderResourceView(ID3D11Resource* pResource, const D3D11_SHADER_RESOURCE_VIEW_DESC* pDesc, ID3D11ShaderResourceView** ppSRView, const char* name)
{
	const HRESULT hr = d3ds.device->CreateShaderResourceView(pResource, pDesc, ppSRView);
	return CheckAndName(hr, "CreateShaderResourceView", *ppSRView, name);
}

static qbool D3D11_CreateUnorderedAccessView(ID3D11Resource* pResource, const D3D11_UNORDERED_ACCESS_VIEW_DESC* pDesc, ID3D11UnorderedAccessView** ppUAView, const char* name)
{
	const HRESULT hr = d3ds.device->CreateUnorderedAccessView(pResource, pDesc, ppUAView);
	return CheckAndName(hr, "CreateUnorderedAccessView", *ppUAView, name);
}

static qbool D3D11_CreateVertexShader(const void* pShaderBytecode, SIZE_T BytecodeLength, ID3D11ClassLinkage* pClassLinkage, ID3D11VertexShader** ppVertexShader, const char* name)
{
	const HRESULT hr = d3ds.device->CreateVertexShader(pShaderBytecode, BytecodeLength, pClassLinkage, ppVertexShader);
	return CheckAndName(hr, "CreateVertexShader", *ppVertexShader, name);
}

static qbool D3D11_CreatePixelShader(const void* pShaderBytecode, SIZE_T BytecodeLength, ID3D11ClassLinkage* pClassLinkage, ID3D11PixelShader** ppPixelShader, const char* name)
{
	const HRESULT hr = d3ds.device->CreatePixelShader(pShaderBytecode, BytecodeLength, pClassLinkage, ppPixelShader);
	return CheckAndName(hr, "CreatePixelShader", *ppPixelShader, name);
}

static qbool D3D11_CreateComputeShader(const void* pShaderBytecode, SIZE_T BytecodeLength, ID3D11ClassLinkage* pClassLinkage, ID3D11ComputeShader** ppComputeShader, const char* name)
{
	const HRESULT hr = d3ds.device->CreateComputeShader(pShaderBytecode, BytecodeLength, pClassLinkage, ppComputeShader);
	return CheckAndName(hr, "CreateComputeShader", *ppComputeShader, name);
}

static qbool D3D11_CreateBuffer(const D3D11_BUFFER_DESC* pDesc, const D3D11_SUBRESOURCE_DATA* pInitialData, ID3D11Buffer** ppBuffer, const char* name)
{
	const HRESULT hr = d3ds.device->CreateBuffer(pDesc, pInitialData, ppBuffer);
	return CheckAndName(hr, "CreateBuffer", *ppBuffer, name);
}

static qbool D3D11_CreateInputLayout(const D3D11_INPUT_ELEMENT_DESC* pInputElementDescs, UINT NumElements, const void* pShaderBytecodeWithInputSignature, SIZE_T BytecodeLength, ID3D11InputLayout** ppInputLayout, const char* name)
{
	const HRESULT hr = d3ds.device->CreateInputLayout(pInputElementDescs, NumElements, pShaderBytecodeWithInputSignature, BytecodeLength, ppInputLayout);
	return CheckAndName(hr, "CreateInputLayout", *ppInputLayout, name);
}

static const char* GetDeviceRemovedReasonString(HRESULT reason)
{
	switch(reason)
	{
		case DXGI_ERROR_DEVICE_HUNG: return "device hung";
		case DXGI_ERROR_DEVICE_REMOVED: return "device removed";
		case DXGI_ERROR_DEVICE_RESET: return "device reset";
		case DXGI_ERROR_DRIVER_INTERNAL_ERROR: return "internal driver error";
		case DXGI_ERROR_INVALID_CALL: return "invalid call";
		case S_OK: return "no error";
		default: return va("unknown error code 0x%08X", (unsigned int)reason);
	}
}

static AlphaTest GetAlphaTest(unsigned int stateBits)
{
	switch(stateBits & GLS_ATEST_BITS)
	{
		case 0: return AT_ALWAYS;
		case GLS_ATEST_GT_0: return AT_GREATER_THAN_0;
		case GLS_ATEST_LT_80: return AT_LESS_THAN_HALF;
		case GLS_ATEST_GE_80: return AT_GREATER_OR_EQUAL_TO_HALF;
		default: return AT_ALWAYS;
	}
}

static DepthFunc GetDepthFunc(unsigned int stateBits)
{
	if(stateBits & GLS_DEPTHFUNC_ALWAYS)
	{
		return DF_ALWAYS;
	}
	
	if(stateBits & GLS_DEPTHFUNC_EQUAL)
	{
		return DF_EQUAL;
	}

	return DF_LEQUAL;
}

static D3D11_COMPARISON_FUNC GetDepthComparison(DepthFunc depthFunc)
{
	switch(depthFunc)
	{
		case DF_ALWAYS: return D3D11_COMPARISON_ALWAYS;
		case DF_EQUAL: return D3D11_COMPARISON_EQUAL;
		default: return D3D11_COMPARISON_LESS_EQUAL;
	}
}

static D3D11_TEXTURE_ADDRESS_MODE GetTextureAddressMode(textureWrap_t wrap)
{
	switch(wrap)
	{
		case TW_CLAMP_TO_EDGE: return D3D11_TEXTURE_ADDRESS_CLAMP;
		case TW_REPEAT: return D3D11_TEXTURE_ADDRESS_WRAP;
		default: return D3D11_TEXTURE_ADDRESS_CLAMP;
	}
}

static DXGI_FORMAT GetTextureFormat(textureFormat_t f)
{
	switch(f)
	{
		case TF_RGBA8:
		default: return DXGI_FORMAT_R8G8B8A8_UNORM;
	}
}

static D3D11_CULL_MODE GetCullMode(cullType_t t)
{
	switch(t)
	{
		case CT_BACK_SIDED: return D3D11_CULL_BACK;
		case CT_FRONT_SIDED: return D3D11_CULL_FRONT;
		case CT_TWO_SIDED: return D3D11_CULL_NONE;
		default: return D3D11_CULL_NONE;
	}
}

static D3D11_BLEND GetSourceBlend(unsigned int stateBits)
{
	switch(stateBits & GLS_SRCBLEND_BITS)
	{
		case GLS_SRCBLEND_ZERO: return D3D11_BLEND_ZERO;
		case GLS_SRCBLEND_ONE: return D3D11_BLEND_ONE;
		case GLS_SRCBLEND_DST_COLOR: return D3D11_BLEND_DEST_COLOR;
		case GLS_SRCBLEND_ONE_MINUS_DST_COLOR: return D3D11_BLEND_INV_DEST_COLOR;
		case GLS_SRCBLEND_SRC_ALPHA: return D3D11_BLEND_SRC_ALPHA;
		case GLS_SRCBLEND_ONE_MINUS_SRC_ALPHA: return D3D11_BLEND_INV_SRC_ALPHA;
		case GLS_SRCBLEND_DST_ALPHA: return D3D11_BLEND_DEST_ALPHA;
		case GLS_SRCBLEND_ONE_MINUS_DST_ALPHA: return D3D11_BLEND_INV_DEST_ALPHA;
		case GLS_SRCBLEND_ALPHA_SATURATE: return D3D11_BLEND_SRC_ALPHA_SAT;
		default: return D3D11_BLEND_ONE;
	}
}

static D3D11_BLEND GetDestinationBlend(unsigned int stateBits)
{
	switch(stateBits & GLS_DSTBLEND_BITS)
	{
		case GLS_DSTBLEND_ZERO: return D3D11_BLEND_ZERO;
		case GLS_DSTBLEND_ONE: return D3D11_BLEND_ONE;
		case GLS_DSTBLEND_SRC_COLOR: return D3D11_BLEND_SRC_COLOR;
		case GLS_DSTBLEND_ONE_MINUS_SRC_COLOR: return D3D11_BLEND_INV_SRC_COLOR;
		case GLS_DSTBLEND_SRC_ALPHA: return D3D11_BLEND_SRC_ALPHA;
		case GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA: return D3D11_BLEND_INV_SRC_ALPHA;
		case GLS_DSTBLEND_DST_ALPHA: return D3D11_BLEND_DEST_ALPHA;
		case GLS_DSTBLEND_ONE_MINUS_DST_ALPHA: return D3D11_BLEND_INV_DEST_ALPHA;
		default: return D3D11_BLEND_ONE;
	}
}

static DXGI_FORMAT GetRenderTargetColorFormat(int format)
{
	switch(format)
	{
		case RTCF_R8G8B8A8: return DXGI_FORMAT_R8G8B8A8_UNORM;
		case RTCF_R10G10B10A2: return DXGI_FORMAT_R10G10B10A2_UNORM;
		case RTCF_R16G16B16A16: return DXGI_FORMAT_R16G16B16A16_UNORM;
		default: return DXGI_FORMAT_R8G8B8A8_UNORM;
	}
}

static void ResetShaderData(ID3D11Resource* buffer, const void* data, size_t bytes)
{
	D3D11_MAPPED_SUBRESOURCE ms;
	const HRESULT hr = d3ds.context->Map(buffer, 0, D3D11_MAP_WRITE_DISCARD, NULL, &ms);
	Check(hr, "Map on shader data");
	memcpy(ms.pData, data, bytes);
	d3ds.context->Unmap(buffer, NULL);
}

static void AppendVertexData(VertexBuffer* buffer, const void* data, int itemCount)
{
	D3D11_MAP mapType = D3D11_MAP_WRITE_NO_OVERWRITE;
	if(buffer->discard || buffer->writeIndex + itemCount > buffer->capacity)
	{
		buffer->discard = qfalse;
		buffer->writeIndex = 0;
		mapType = D3D11_MAP_WRITE_DISCARD;
	}

	if(data != NULL || mapType == D3D11_MAP_WRITE_DISCARD)
	{
		D3D11_MAPPED_SUBRESOURCE ms;
		const HRESULT hr = d3ds.context->Map(buffer->buffer, 0, mapType, NULL, &ms);
		Check(hr, "Map on vertex data");
		if(data != NULL)
		{
			memcpy((byte*)ms.pData + buffer->writeIndex * buffer->itemSize, data, itemCount * buffer->itemSize);
		}
		d3ds.context->Unmap(buffer->buffer, NULL);
	}

	buffer->readIndex = buffer->writeIndex;
	buffer->writeIndex += itemCount;
}

static void AppendVertexDataGroup(const void* data[VB_COUNT], int vertexCount)
{
	for(int i = 0; i < VB_COUNT; ++i)
	{
		AppendVertexData(&d3d.vertexBuffers[i], data[i], vertexCount);
	}
}

static void UploadPendingShaderData()
{
	if((unsigned)d3d.pipelineIndex >= PID_COUNT)
	{
		return;
	}

	const PipelineId pid = d3d.pipelineIndex;
	Pipeline* const pipeline = &d3d.pipelines[pid];

	if(pid == PID_GENERIC)
	{
		GenericVSData vsData;
		GenericPSData psData;
		memcpy(vsData.modelViewMatrix, d3d.modelViewMatrix, sizeof(vsData.modelViewMatrix));
		memcpy(vsData.projectionMatrix, d3d.projectionMatrix, sizeof(vsData.projectionMatrix));
		memcpy(vsData.clipPlane, d3d.clipPlane, sizeof(vsData.clipPlane));
		psData.alphaTest = d3d.alphaTest;
		psData.texEnv = d3d.texEnv;
		psData.seed[0] = d3d.frameSeed[0];
		psData.seed[1] = d3d.frameSeed[1];
		psData.invGamma = 1.0f / r_gamma->value;
		psData.invBrightness = 1.0f / r_brightness->value;
		psData.noiseScale = backEnd.projection2D ? 0.0f : r_ditherStrength->value;
		psData.alphaBoost = r_alphaToCoverageMipBoost->value;
		ResetShaderData(pipeline->vertexBuffer, &vsData, sizeof(vsData));
		ResetShaderData(pipeline->pixelBuffer, &psData, sizeof(psData));
	}
	else if(pid == PID_SOFT_SPRITE)
	{
		DepthFadeVSData vsData;
		DepthFadePSData psData;
		memcpy(vsData.modelViewMatrix, d3d.modelViewMatrix, sizeof(vsData.modelViewMatrix));
		memcpy(vsData.projectionMatrix, d3d.projectionMatrix, sizeof(vsData.projectionMatrix));
		memcpy(vsData.clipPlane, d3d.clipPlane, sizeof(vsData.clipPlane));
		psData.alphaTest = d3d.alphaTest;
		memcpy(psData.scale, r_depthFadeScale[tess.shader->dfType], sizeof(psData.scale));
		memcpy(psData.bias, r_depthFadeBias[tess.shader->dfType], sizeof(psData.bias));
		psData.distance = tess.shader->dfInvDist;
		psData.offset = tess.shader->dfBias;
		ResetShaderData(pipeline->vertexBuffer, &vsData, sizeof(vsData));
		ResetShaderData(pipeline->pixelBuffer, &psData, sizeof(psData));
	}
	else if(pid == PID_DYNAMIC_LIGHT)
	{
		DynamicLightVSData vsData;
		DynamicLightPSData psData;
		memcpy(vsData.modelViewMatrix, d3d.modelViewMatrix, sizeof(vsData.modelViewMatrix));
		memcpy(vsData.projectionMatrix, d3d.projectionMatrix, sizeof(vsData.projectionMatrix));
		memcpy(vsData.clipPlane, d3d.clipPlane, sizeof(vsData.clipPlane));
		memcpy(vsData.osEyePos, d3d.osEyePos, sizeof(vsData.osEyePos));
		memcpy(vsData.osLightPos, d3d.osLightPos, sizeof(vsData.osLightPos));
		memcpy(psData.lightColor, d3d.lightColor, sizeof(psData.lightColor));
		psData.lightRadius = d3d.lightRadius;
		psData.opaque = backEnd.dlOpaque ? 1.0f : 0.0f;
		psData.intensity = backEnd.dlIntensity;
		ResetShaderData(pipeline->vertexBuffer, &vsData, sizeof(vsData));
		ResetShaderData(pipeline->pixelBuffer, &psData, sizeof(psData));
	}
	else if(pid == PID_POST_PROCESS)
	{
		ResetShaderData(pipeline->vertexBuffer, &d3d.postVSData, sizeof(d3d.postVSData));
		ResetShaderData(pipeline->pixelBuffer, &d3d.postPSData, sizeof(d3d.postPSData));
	}
	else if(pid == PID_CLEAR)
	{
		ResetShaderData(pipeline->pixelBuffer, &d3d.clearPSData, sizeof(d3d.clearPSData));
	}
}

static int ComputeSamplerStateIndex(int textureWrap, int textureMode)
{
	return textureMode * TW_COUNT + textureWrap;
}

static void ApplySamplerState(UINT slot, textureWrap_t textureWrap, TextureMode textureMode)
{
	const int index = ComputeSamplerStateIndex(textureWrap, textureMode);
	if(index == d3d.samplerStateIndices[slot])
	{
		return;
	}

	d3ds.context->PSSetSamplers(slot, 1, &d3d.samplerStates[index]);
	d3d.samplerStateIndices[slot] = index;
}

static void ApplyPixelShaderResource(UINT slot, ID3D11ShaderResourceView* srv)
{
	if(srv == d3d.pixelShaderResources[slot])
	{
		return;
	}

	d3ds.context->PSSetShaderResources(slot, 1, &srv);
	d3d.pixelShaderResources[slot] = srv;
}

static void DrawIndexed(int indexCount)
{
	if(d3d.splitBufferOffsets)
	{
		UINT offsets[VB_COUNT];
		for(int i = 0; i < d3d.vbCount; ++i)
		{
			VertexBuffer* const vb = &d3d.vertexBuffers[d3d.vbIds[i]];
			offsets[i] = vb->readIndex * vb->itemSize; // in bytes, not vertices
		}

		d3ds.context->IASetVertexBuffers(0, d3d.vbCount, d3d.vbBuffers, d3d.vbStrides, offsets);
		d3ds.context->DrawIndexed(indexCount, d3d.indexBuffer.readIndex, 0);
	}
	else
	{
		d3ds.context->DrawIndexed(indexCount, d3d.indexBuffer.readIndex, d3d.vertexBuffers[VB_POSITION].readIndex);
	}
}

static void ApplyPipeline(PipelineId index)
{
	if(index == d3d.pipelineIndex || (unsigned)index >= PID_COUNT)
	{
		return;
	}

	const PipelineId unfixedIndex = index;
	if(index == PID_SCREENSHOT)
	{
		index = PID_POST_PROCESS;
	}

	Pipeline* const pipeline = &d3d.pipelines[index];
	if(pipeline->inputLayout)
	{
		d3ds.context->IASetInputLayout(pipeline->inputLayout);

		int count = 0;
		VertexBufferId* const ids = d3d.vbIds;
		if(index == PID_GENERIC)
		{
			ids[count++] = VB_POSITION;
			ids[count++] = VB_COLOR;
			ids[count++] = VB_TEXCOORD;
			ids[count++] = VB_TEXCOORD2;
		}
		else if(index == PID_SOFT_SPRITE)
		{
			ids[count++] = VB_POSITION;
			ids[count++] = VB_COLOR;
			ids[count++] = VB_TEXCOORD;
		}
		else if(index == PID_DYNAMIC_LIGHT)
		{
			ids[count++] = VB_POSITION;
			ids[count++] = VB_NORMAL;
			ids[count++] = VB_COLOR;
			ids[count++] = VB_TEXCOORD;
		}
		d3d.vbCount = count;

		for(int i = 0; i < count; ++i)
		{
			VertexBuffer* const vb = &d3d.vertexBuffers[ids[i]];
			d3d.vbBuffers[i] = vb->buffer;
			d3d.vbStrides[i] = vb->itemSize;
		}

		if(!d3d.splitBufferOffsets)
		{
			UINT offsets[VB_COUNT] = { 0 };
			d3ds.context->IASetVertexBuffers(0, count, d3d.vbBuffers, d3d.vbStrides, offsets);
		}
	}
	else
	{
		d3ds.context->IASetInputLayout(NULL);
		d3ds.context->IASetVertexBuffers(0, 0, NULL, NULL, NULL);
		d3d.vbCount = 0;
	}

	d3ds.context->VSSetShader(pipeline->vertexShader, NULL, 0);
	d3ds.context->PSSetShader(pipeline->pixelShader, NULL, 0);

	if(pipeline->vertexBuffer)
	{
		d3ds.context->VSSetConstantBuffers(0, 1, &pipeline->vertexBuffer);
	}
	if(pipeline->pixelBuffer)
	{
		d3ds.context->PSSetConstantBuffers(0, 1, &pipeline->pixelBuffer);
	}

	if(unfixedIndex == PID_POST_PROCESS)
	{
		d3ds.context->OMSetRenderTargets(1, &d3d.backBufferRTView, NULL);
	}
	else if(unfixedIndex == PID_SCREENSHOT)
	{
		d3ds.context->OMSetRenderTargets(1, &d3d.screenshotTextureRTView, NULL);
	}
	else if(unfixedIndex == PID_SOFT_SPRITE)
	{
		d3ds.context->OMSetRenderTargets(1, &d3d.renderTargetViewMS, NULL);
		ApplyPixelShaderResource(1, d3d.depthStencilShaderView);
		ApplySamplerState(1, TW_CLAMP_TO_EDGE, TM_BILINEAR);
	}
	else
	{
		// keep this call order to make sure the depth buffer isn't bound as a SRV anymore
		// when we set it as a render target
		ApplyPixelShaderResource(1, d3d.textures[0].view);
		d3ds.context->OMSetRenderTargets(1, &d3d.renderTargetViewMS, d3d.depthStencilView);
	}

	d3d.pipelineIndex = index;
}

static void ApplyViewport(int x, int y, int w, int h, int th)
{
	const int top = th - y - h;

	D3D11_VIEWPORT vp;
	vp.TopLeftX = x;
	vp.TopLeftY = top;
	vp.Width = w;
	vp.Height = h;
	vp.MinDepth = 0.0f;
	vp.MaxDepth = 1.0f;
	d3ds.context->RSSetViewports(1, &vp);
}

static void ApplyScissor(int x, int y, int w, int h, int th)
{
	const int top = th - y - h;
	const int bottom = th - y;

	D3D11_RECT sr;
	sr.left = x;
	sr.top = top;
	sr.right = x + w;
	sr.bottom = bottom;
	d3ds.context->RSSetScissorRects(1, &sr);
}

static void ApplyViewportAndScissor(int x, int y, int w, int h, int th)
{
	ApplyViewport(x, y, w, h, th);
	ApplyScissor(x, y, w, h, th);
}

static void CreateTexture(Texture* texture, image_t* image, int mipCount, int w, int h)
{
	COM_RELEASE(texture->texture);
	COM_RELEASE(texture->view);

	ID3D11Texture2D* texture2D;
	D3D11_TEXTURE2D_DESC texDesc;
	ZeroMemory(&texDesc, sizeof(texDesc));
	texDesc.ArraySize = 1;
	texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
	texDesc.CPUAccessFlags = 0;
	texDesc.Format = GetTextureFormat(image->format);
	texDesc.Usage = D3D11_USAGE_DEFAULT;
	texDesc.Width = w;
	texDesc.Height = h;
	texDesc.MipLevels = mipCount;
	texDesc.SampleDesc.Count = 1;
	texDesc.SampleDesc.Quality = 0;
	D3D11_CreateTexture2D(&texDesc, NULL, &texture2D, image->name);

	ID3D11ShaderResourceView* view;
	D3D11_SHADER_RESOURCE_VIEW_DESC viewDesc;
	ZeroMemory(&viewDesc, sizeof(viewDesc));
	viewDesc.Format = texDesc.Format;
	viewDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
	viewDesc.Texture2D.MipLevels = UINT(-1);
	viewDesc.Texture2D.MostDetailedMip = 0;
	D3D11_CreateShaderResourceView(texture2D, &viewDesc, &view, image->name);

	texture->texture = texture2D;
	texture->view = view;
}

static void UpdateAnimatedImage(image_t* image, int w, int h, const byte* data, qbool dirty)
{
	if(w != image->width || h != image->height)
	{
		image->width = w;
		image->height = h;
		CreateTexture(&d3d.textures[image->texnum], image, 1, w, h);
		GAL_UpdateTexture(image, 0, 0, 0, w, h, data);
	}
	else if(dirty)
	{
		GAL_UpdateTexture(image, 0, 0, 0, w, h, data);
	}
}

static const image_t* GetBundleImage(const textureBundle_t* bundle)
{
	return R_UpdateAndGetBundleImage(bundle, &UpdateAnimatedImage);
}

static int ComputeBlendStateIndex(int srcBlend, int dstBlend, qbool alphaToCoverage)
{
	return alphaToCoverage * (BLEND_STATE_COUNT * BLEND_STATE_COUNT) + (srcBlend * BLEND_STATE_COUNT) + dstBlend;
}

static void ApplyBlendState(D3D11_BLEND srcBlend, D3D11_BLEND dstBlend, qbool aphaToCoverage)
{
	const int index = ComputeBlendStateIndex(srcBlend, dstBlend, aphaToCoverage);
	if((unsigned)index >= ARRAY_LEN(d3d.blendStates))
		ri.Error(ERR_FATAL, "Tried to set an invalid blend state combo!");
	if(d3d.blendStates[index] == NULL)
		ri.Error(ERR_FATAL, "Tried to set an unregistered blend state!");

	if(index == d3d.blendStateIndex)
	{
		return;
	}

	d3ds.context->OMSetBlendState(d3d.blendStates[index], NULL, 0xFFFFFFFF);
	d3d.blendStateIndex = index;
}

static int ComputeDepthStencilStateIndex(int disableDepth, int depthFunc, int maskTrue)
{
	return maskTrue + (depthFunc + (disableDepth * DF_COUNT)) * 2;
}

static void ApplyDepthStencilState(qbool disableDepth, DepthFunc depthFunc, qbool maskTrue)
{
	const int index = ComputeDepthStencilStateIndex(disableDepth, (int)depthFunc, maskTrue);
	if(index == d3d.depthStencilStateIndex)
	{
		return;
	}

	d3d.depthStencilStateIndex = index;
	d3ds.context->OMSetDepthStencilState(d3d.depthStencilStates[index], 0);
}

static int ComputeRasterizerStateIndex(int wireFrame, int cullType, int polygonOffset)
{
	return cullType * 4 + wireFrame * 2 + polygonOffset;
}

static void ApplyRasterizerState(qbool wireFrame, cullType_t cullType, qbool polygonOffset)
{
	const int index = ComputeRasterizerStateIndex(wireFrame, cullType, polygonOffset);
	if(index == d3d.rasterStateIndex)
	{
		return;
	}

	d3d.rasterStateIndex = index;
	d3ds.context->RSSetState(d3d.rasterStates[index]);
}

static void ApplyState(unsigned int stateBits, cullType_t cullType, qbool polygonOffset)
{
	static unsigned int oldStateBits = 0;

	const unsigned int diffBits = oldStateBits ^ stateBits;
	oldStateBits = stateBits;

	d3d.alphaTest = GetAlphaTest(stateBits);

	if(diffBits & (GLS_SRCBLEND_BITS | GLS_DSTBLEND_BITS | GLS_ATEST_BITS))
	{
		const D3D11_BLEND srcBlend = (stateBits & GLS_SRCBLEND_BITS) ? GetSourceBlend(stateBits) : D3D11_BLEND_ONE;
		const D3D11_BLEND dstBlend = (stateBits & GLS_DSTBLEND_BITS) ? GetDestinationBlend(stateBits) : D3D11_BLEND_ZERO;
		ApplyBlendState(srcBlend, dstBlend, glInfo.alphaToCoverageSupport && d3d.pipelineIndex == PID_GENERIC && d3d.alphaTest != AT_ALWAYS);
	}

	const qbool disableDepth = (stateBits & GLS_DEPTHTEST_DISABLE) ? 1 : 0;
	const DepthFunc depthFunc = GetDepthFunc(stateBits);
	const qbool maskTrue = (stateBits & GLS_DEPTHMASK_TRUE) ? 1 : 0;
	ApplyDepthStencilState(disableDepth, depthFunc, maskTrue);

	// fix up the cull mode for mirrors
	if(backEnd.viewParms.isMirror)
	{
		if(cullType == CT_BACK_SIDED)
		{
			cullType = CT_FRONT_SIDED;
		}
		else if(cullType == CT_FRONT_SIDED)
		{
			cullType = CT_BACK_SIDED;
		}
	}
	ApplyRasterizerState((stateBits & GLS_POLYMODE_LINE) != 0, cullType, polygonOffset);
}

static void BindImage(UINT slot, const image_t* image)
{
	ID3D11ShaderResourceView* view = d3d.textures[image->texnum].view;
	ApplyPixelShaderResource(slot, view);
	TextureMode mode = TM_ANISOTROPIC;
	if(Q_stricmp(r_textureMode->string, "GL_NEAREST") == 0 &&
	   !backEnd.projection2D &&
	   (image->flags & (IMG_LMATLAS | IMG_EXTLMATLAS | IMG_NOPICMIP)) == 0)
	{
		mode = TM_NEAREST;
	}
	else if((image->flags & IMG_NOAF) != 0)
	{
		mode = TM_BILINEAR;
	}
	ApplySamplerState(slot, image->wrapClampMode, mode);
}

static void BindBundle(UINT slot, const textureBundle_t* bundle)
{
	BindImage(slot, GetBundleImage(bundle));
}

static void FindBestAvailableAA(DXGI_SAMPLE_DESC* sampleDesc)
{
	// @NOTE: D3D10_MAX_MULTISAMPLE_SAMPLE_COUNT == D3D11_MAX_MULTISAMPLE_SAMPLE_COUNT
	sampleDesc->Count = (UINT)min(r_msaa->integer, D3D11_MAX_MULTISAMPLE_SAMPLE_COUNT);
	sampleDesc->Quality = 0;

	if(r_colorMipLevels->integer)
	{
		sampleDesc->Count = 0;
	}

	while(sampleDesc->Count > 0)
	{
		UINT levelCount = 0;
		if(SUCCEEDED(d3ds.device->CheckMultisampleQualityLevels(d3d.formatColorRT, sampleDesc->Count, &levelCount)) &&
		   levelCount > 0 &&
		   SUCCEEDED(d3ds.device->CheckMultisampleQualityLevels(d3d.formatDepth, sampleDesc->Count, &levelCount)) &&
		   levelCount > 0)
		   break;

		--sampleDesc->Count;
	}

	if(sampleDesc->Count <= 1)
	{
		sampleDesc->Count = 1;
		sampleDesc->Quality = 0;
	}
}

static qbool CheckFlipAndTearSupport()
{
	if(r_d3d11_presentMode->integer != DXGIPM_FLIPDISCARD)
	{
		return qfalse;
	}

	HMODULE library = LoadLibraryA("DXGI.dll");
	if(library == NULL)
	{
		ri.Printf(PRINT_WARNING, "CheckTearingSupport: DXGI.dll couldn't be found or opened\n");
		return qfalse;
	}

	typedef HRESULT (WINAPI *PFN_CreateDXGIFactory)(REFIID riid, _Out_ void **ppFactory);
	PFN_CreateDXGIFactory pCreateDXGIFactory = (PFN_CreateDXGIFactory)GetProcAddress(library, "CreateDXGIFactory");
	if(pCreateDXGIFactory == NULL)
	{
		FreeLibrary(library);
		ri.Printf(PRINT_WARNING, "CheckTearingSupport: Failed to locate CreateDXGIFactory in DXGI.dll\n");
		return qfalse;
	}

	HRESULT hr;
	BOOL enabled = FALSE;
	IDXGIFactory5* pFactory;
	hr = (*pCreateDXGIFactory)(__uuidof(IDXGIFactory5), (void**)&pFactory);
	if(FAILED(hr))
	{
		FreeLibrary(library);
		ri.Printf(PRINT_WARNING, "CheckTearingSupport: 'CreateDXGIFactory' failed with code 0x%08X (%s)\n", (unsigned int)hr, GetSystemErrorString(hr));
		return qfalse;
	}
	hr = pFactory->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, &enabled, sizeof(enabled));
	pFactory->Release();
	FreeLibrary(library);

	if(FAILED(hr))
	{
		ri.Printf(PRINT_WARNING, "CheckTearingSupport: 'IDXGIFactory5::CheckFeatureSupport' failed with code 0x%08X (%s)\n", (unsigned int)hr, GetSystemErrorString(hr));
		return qfalse;
	}

	return enabled != 0;
}

static qbool GAL_Init()
{
	Sys_V_Init(GAL_D3D11);

	ZeroMemory(&d3d, sizeof(d3d));

	HRESULT hr = S_OK;
	qbool fullInit = qfalse;
	DXGI_SWAP_CHAIN_DESC swapChainDesc;
	if(d3ds.library == NULL)
	{
		fullInit = qtrue;

		d3ds.library = LoadLibraryA("D3D11.dll");
		if(d3ds.library == NULL)
			ri.Error(ERR_FATAL, "D3D11.dll couldn't be found or opened");

		PFN_D3D11_CREATE_DEVICE_AND_SWAP_CHAIN pD3D11CreateDeviceAndSwapChain =
			(PFN_D3D11_CREATE_DEVICE_AND_SWAP_CHAIN)GetProcAddress(d3ds.library, "D3D11CreateDeviceAndSwapChain");
		if(pD3D11CreateDeviceAndSwapChain == NULL)
			ri.Error(ERR_FATAL, "Failed to locate D3D11CreateDeviceAndSwapChain in D3D11.dll");

		const D3D_FEATURE_LEVEL featureLevels[2] = { D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1 };
		UINT flags = D3D11_CREATE_DEVICE_SINGLETHREADED;
#if defined(_DEBUG)
		flags |= D3D11_CREATE_DEVICE_DEBUG;
#endif

		d3ds.flipAndTear = CheckFlipAndTearSupport();

		ZeroMemory(&swapChainDesc, sizeof(swapChainDesc));
		swapChainDesc.BufferDesc.Width = glInfo.winWidth;
		swapChainDesc.BufferDesc.Height = glInfo.winHeight;
		swapChainDesc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
		swapChainDesc.BufferDesc.RefreshRate.Numerator = 0;
		swapChainDesc.BufferDesc.RefreshRate.Denominator = 1;
		swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
		swapChainDesc.OutputWindow = GetActiveWindow();
		swapChainDesc.SampleDesc.Count = 1;
		swapChainDesc.SampleDesc.Quality = 0;
		swapChainDesc.Windowed = TRUE;
		swapChainDesc.BufferDesc.ScanlineOrdering = DXGI_MODE_SCANLINE_ORDER_UNSPECIFIED;
		swapChainDesc.BufferDesc.Scaling = DXGI_MODE_SCALING_UNSPECIFIED;
		if(d3ds.flipAndTear)
		{
			// flip and tear, until it is done
			swapChainDesc.BufferCount = 2;
			swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
			swapChainDesc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING;
		}
		else
		{
			swapChainDesc.BufferCount = 1;
			swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD;
			swapChainDesc.Flags = 0;
		}

	create_device:
		hr = (*pD3D11CreateDeviceAndSwapChain)(
			NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, flags, featureLevels, ARRAY_LEN(featureLevels), D3D11_SDK_VERSION,
			&swapChainDesc, &d3ds.swapChain, &d3ds.device, NULL, &d3ds.context);
		if(hr == DXGI_ERROR_SDK_COMPONENT_MISSING)
		{
			ri.Printf(PRINT_WARNING, "D3D11CreateDeviceAndSwapChain failed because you don't have the SDK installed.\n");
			ri.Printf(PRINT_WARNING, "Trying to create the device again without the debug layer...\n");
			flags &= ~D3D11_CREATE_DEVICE_DEBUG;
			goto create_device;
		}
		Check(hr, "D3D11CreateDeviceAndSwapChain");
	}
	else
	{
		hr = d3ds.swapChain->GetDesc(&swapChainDesc);
		Check(hr, "IDXGISwapChain::GetDesc");
	}

	d3d.formatColorRT = GetRenderTargetColorFormat(r_rtColorFormat->integer);
	d3d.formatDepth = DXGI_FORMAT_R24G8_TYPELESS;
	d3d.formatDepthRTV = DXGI_FORMAT_R24_UNORM_X8_TYPELESS;
	d3d.formatDepthView = DXGI_FORMAT_D24_UNORM_S8_UINT;

	D3D11_TEXTURE2D_DESC readbackTexDesc;
	ZeroMemory(&readbackTexDesc, sizeof(readbackTexDesc));
	readbackTexDesc.Width = glConfig.vidWidth;
	readbackTexDesc.Height = glConfig.vidHeight;
	readbackTexDesc.MipLevels = 1;
	readbackTexDesc.ArraySize = 1;
	readbackTexDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
	readbackTexDesc.SampleDesc.Count = 1;
	readbackTexDesc.SampleDesc.Quality = 0;
	readbackTexDesc.Usage = D3D11_USAGE_STAGING;
	readbackTexDesc.BindFlags = 0;
	readbackTexDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
	readbackTexDesc.MiscFlags = 0;
	d3d.errorMode = EM_SILENT;
	if(!D3D11_CreateTexture2D(&readbackTexDesc, 0, &d3d.readbackTexture, "readback texture"))
		ri.Printf(PRINT_WARNING, "Screengrab texture creation failed! /" S_COLOR_CMD "screenshot^7* and /" S_COLOR_CMD "video^7 won't work\n");
	d3d.errorMode = EM_FATAL;

	if(d3d.readbackTexture != NULL && r_mode->integer == VIDEOMODE_UPSCALE)
	{
		d3d.errorMode = EM_SILENT;

		D3D11_TEXTURE2D_DESC screenshotTexDesc;
		ZeroMemory(&screenshotTexDesc, sizeof(screenshotTexDesc));
		screenshotTexDesc.Width = glConfig.vidWidth;
		screenshotTexDesc.Height = glConfig.vidHeight;
		screenshotTexDesc.MipLevels = 1;
		screenshotTexDesc.ArraySize = 1;
		screenshotTexDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
		screenshotTexDesc.SampleDesc.Count = 1;
		screenshotTexDesc.SampleDesc.Quality = 0;
		screenshotTexDesc.Usage = D3D11_USAGE_DEFAULT;
		screenshotTexDesc.BindFlags = D3D11_BIND_RENDER_TARGET;
		screenshotTexDesc.CPUAccessFlags = 0;
		screenshotTexDesc.MiscFlags = 0;
		if(!D3D11_CreateTexture2D(&screenshotTexDesc, 0, &d3d.screenshotTexture, "screenshot texture"))
			ri.Printf(PRINT_WARNING, "Screenshot texture creation failed! /" S_COLOR_CMD "screenshot^7* and /" S_COLOR_CMD "video^7 may not work\n");

		D3D11_RENDER_TARGET_VIEW_DESC screenshotRTVDesc;
		ZeroMemory(&screenshotRTVDesc, sizeof(screenshotRTVDesc));
		screenshotRTVDesc.Format = swapChainDesc.BufferDesc.Format;
		screenshotRTVDesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D;
		screenshotRTVDesc.Texture2D.MipSlice = 0;
		if(!D3D11_CreateRenderTargetView(d3d.screenshotTexture, &screenshotRTVDesc, &d3d.screenshotTextureRTView, "screenshot texture render target view"))
			ri.Printf(PRINT_WARNING, "Screenshot texture RTV creation failed! /" S_COLOR_CMD "screenshot^7* and /" S_COLOR_CMD "video^7 may not work\n");

		d3d.errorMode = EM_FATAL;
	}

	hr = d3ds.swapChain->GetBuffer(0, IID_ID3D11Texture2D, (void**)&d3d.backBufferTexture);
	CheckAndName(hr, "GetBuffer", d3d.backBufferTexture, "back buffer texture");

	D3D11_RENDER_TARGET_VIEW_DESC colorViewDesc; // needed?
	ZeroMemory(&colorViewDesc, sizeof(colorViewDesc));
	colorViewDesc.Format = swapChainDesc.BufferDesc.Format;
	colorViewDesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D;
	colorViewDesc.Texture2D.MipSlice = 0;
	D3D11_CreateRenderTargetView(d3d.backBufferTexture, &colorViewDesc, &d3d.backBufferRTView, "back buffer render target view");

	DXGI_SAMPLE_DESC sampleDesc;
	FindBestAvailableAA(&sampleDesc);
	const qbool alphaToCoverageOK = sampleDesc.Count > 1 && r_alphaToCoverage->integer != 0;

	D3D11_TEXTURE2D_DESC renderTargetTexDesc;
	ZeroMemory(&renderTargetTexDesc, sizeof(renderTargetTexDesc));
	renderTargetTexDesc.Width = glConfig.vidWidth;
	renderTargetTexDesc.Height = glConfig.vidHeight;
	renderTargetTexDesc.MipLevels = 1;
	renderTargetTexDesc.ArraySize = 1;
	renderTargetTexDesc.Format = d3d.formatColorRT;
	renderTargetTexDesc.SampleDesc.Count = sampleDesc.Count;
	renderTargetTexDesc.SampleDesc.Quality = sampleDesc.Quality;
	renderTargetTexDesc.Usage = D3D11_USAGE_DEFAULT;
	renderTargetTexDesc.BindFlags = D3D11_BIND_RENDER_TARGET;
	renderTargetTexDesc.CPUAccessFlags = 0;
	renderTargetTexDesc.MiscFlags = 0;
	D3D11_CreateTexture2D(&renderTargetTexDesc, 0, &d3d.renderTargetTextureMS, "MS render target texture");

	D3D11_RENDER_TARGET_VIEW_DESC rtvDesc;
	ZeroMemory(&rtvDesc, sizeof(rtvDesc));
	rtvDesc.Format = renderTargetTexDesc.Format;
	rtvDesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2DMS;
	D3D11_CreateRenderTargetView(d3d.renderTargetTextureMS, &rtvDesc, &d3d.renderTargetViewMS, "MS render target view");

	ZeroMemory(&renderTargetTexDesc, sizeof(renderTargetTexDesc));
	renderTargetTexDesc.Width = glConfig.vidWidth;
	renderTargetTexDesc.Height = glConfig.vidHeight;
	renderTargetTexDesc.MipLevels = 1;
	renderTargetTexDesc.ArraySize = 1;
	renderTargetTexDesc.Format = d3d.formatColorRT;
	renderTargetTexDesc.SampleDesc.Count = 1;
	renderTargetTexDesc.SampleDesc.Quality = 0;
	renderTargetTexDesc.Usage = D3D11_USAGE_DEFAULT;
	renderTargetTexDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
	renderTargetTexDesc.CPUAccessFlags = 0;
	renderTargetTexDesc.MiscFlags = 0;
	D3D11_CreateTexture2D(&renderTargetTexDesc, 0, &d3d.resolveTexture, "resolve texture");

	D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc;
	ZeroMemory(&srvDesc, sizeof(srvDesc));
	srvDesc.Format = renderTargetTexDesc.Format;
	srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
	srvDesc.Texture2D.MipLevels = 1;
	srvDesc.Texture2D.MostDetailedMip = 0;
	D3D11_CreateShaderResourceView(d3d.resolveTexture, &srvDesc, &d3d.resolveTextureShaderView, "resolve texture shader resource view");

	D3D11_TEXTURE2D_DESC depthStencilTexDesc;
	ZeroMemory(&depthStencilTexDesc, sizeof(depthStencilTexDesc));
	depthStencilTexDesc.Width = glConfig.vidWidth;
	depthStencilTexDesc.Height = glConfig.vidHeight;
	depthStencilTexDesc.MipLevels = 1;
	depthStencilTexDesc.ArraySize = 1;
	depthStencilTexDesc.Format = d3d.formatDepth;
	depthStencilTexDesc.SampleDesc.Count = sampleDesc.Count;
	depthStencilTexDesc.SampleDesc.Quality = sampleDesc.Quality;
	depthStencilTexDesc.Usage = D3D11_USAGE_DEFAULT;
	depthStencilTexDesc.BindFlags = D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE;
	depthStencilTexDesc.CPUAccessFlags = 0;
	depthStencilTexDesc.MiscFlags = 0;
	D3D11_CreateTexture2D(&depthStencilTexDesc, 0, &d3d.depthStencilTexture, "depth stencil texture");

	D3D11_DEPTH_STENCIL_VIEW_DESC depthStencilViewDesc;
	ZeroMemory(&depthStencilViewDesc, sizeof(depthStencilViewDesc));
	depthStencilViewDesc.Format = d3d.formatDepthView;
	depthStencilViewDesc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2DMS;
	depthStencilViewDesc.Texture2D.MipSlice = 0;
	hr = d3ds.device->CreateDepthStencilView(d3d.depthStencilTexture, &depthStencilViewDesc, &d3d.depthStencilView);
	CheckAndName(hr, "CreateDepthStencilView", d3d.depthStencilView, "depth stencil view");

	ZeroMemory(&srvDesc, sizeof(srvDesc));
	srvDesc.Format = d3d.formatDepthRTV;
	srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DMS;
	D3D11_CreateShaderResourceView(d3d.depthStencilTexture, &srvDesc, &d3d.depthStencilShaderView, "depth stencil shader resource view");

	const ShaderDesc* const genericPS = &genericPixelShaders[(alphaToCoverageOK != 0) + 2 * (r_dither->integer != 0)];
	D3D11_CreateVertexShader(g_generic_vs, ARRAY_LEN(g_generic_vs), NULL, &d3d.pipelines[PID_GENERIC].vertexShader, "generic vertex shader");
	D3D11_CreatePixelShader(genericPS->code, genericPS->size, NULL, &d3d.pipelines[PID_GENERIC].pixelShader, genericPS->name);

	D3D11_INPUT_ELEMENT_DESC genericInputLayoutDesc[] =
	{
		{ "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
		{ "COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 1, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
		{ "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 2, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
		{ "TEXCOORD", 1, DXGI_FORMAT_R32G32_FLOAT, 3, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 }
	};
	D3D11_CreateInputLayout(genericInputLayoutDesc, ARRAY_LEN(genericInputLayoutDesc), g_generic_vs, ARRAY_LEN(g_generic_vs), &d3d.pipelines[PID_GENERIC].inputLayout, "generic input layout");

	d3ds.context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);

	const int maxVertexCount = SHADER_MAX_VERTEXES;
	const int maxIndexCount = SHADER_MAX_INDEXES;

	VertexBuffer* const vb = d3d.vertexBuffers;
	vb[VB_POSITION].itemSize = sizeof(vec4_t);
	vb[VB_NORMAL].itemSize = sizeof(vec4_t);
	vb[VB_TEXCOORD].itemSize = sizeof(vec2_t);
	vb[VB_TEXCOORD2].itemSize = sizeof(vec2_t);
	vb[VB_COLOR].itemSize = sizeof(color4ub_t);
	d3d.indexBuffer.itemSize = sizeof(uint32_t);
	for(int i = 0; i < ARRAY_LEN(d3d.vertexBuffers); ++i)
	{
		vb[i].capacity = maxVertexCount;
		vb[i].discard = qtrue;
	}
	d3d.indexBuffer.capacity = maxIndexCount;
	d3d.indexBuffer.discard = qtrue;

	D3D11_BUFFER_DESC vertexBufferDesc;
	ZeroMemory(&vertexBufferDesc, sizeof(vertexBufferDesc));
	vertexBufferDesc.Usage = D3D11_USAGE_DYNAMIC;
	vertexBufferDesc.ByteWidth = maxVertexCount * vb[VB_POSITION].itemSize;
	vertexBufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
	vertexBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
	D3D11_CreateBuffer(&vertexBufferDesc, NULL, &vb[VB_POSITION].buffer, "position vertex buffer");
	D3D11_CreateBuffer(&vertexBufferDesc, NULL, &vb[VB_NORMAL].buffer, "normal vertex buffer");

	D3D11_BUFFER_DESC colorBufferDesc;
	ZeroMemory(&colorBufferDesc, sizeof(colorBufferDesc));
	colorBufferDesc.Usage = D3D11_USAGE_DYNAMIC;
	colorBufferDesc.ByteWidth = maxVertexCount * vb[VB_COLOR].itemSize;
	colorBufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
	colorBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
	D3D11_CreateBuffer(&colorBufferDesc, NULL, &vb[VB_COLOR].buffer, "color vertex buffer");

	D3D11_BUFFER_DESC texCoordBufferDesc;
	ZeroMemory(&texCoordBufferDesc, sizeof(texCoordBufferDesc));
	texCoordBufferDesc.Usage = D3D11_USAGE_DYNAMIC;
	texCoordBufferDesc.ByteWidth = maxVertexCount * vb[VB_TEXCOORD].itemSize;
	texCoordBufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
	texCoordBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
	D3D11_CreateBuffer(&texCoordBufferDesc, NULL, &vb[VB_TEXCOORD].buffer, "texture coordinates vertex buffer #1");
	D3D11_CreateBuffer(&texCoordBufferDesc, NULL, &vb[VB_TEXCOORD2].buffer, "texture coordinates vertex buffer #2");

	D3D11_BUFFER_DESC indexBufferDesc;
	ZeroMemory(&indexBufferDesc, sizeof(indexBufferDesc));
	indexBufferDesc.Usage = D3D11_USAGE_DYNAMIC;
	indexBufferDesc.ByteWidth = maxIndexCount * d3d.indexBuffer.itemSize;
	indexBufferDesc.BindFlags = D3D11_BIND_INDEX_BUFFER;
	indexBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
	D3D11_CreateBuffer(&indexBufferDesc, NULL, &d3d.indexBuffer.buffer, "index buffer");

	d3ds.context->IASetIndexBuffer(d3d.indexBuffer.buffer, DXGI_FORMAT_R32_UINT, 0);

	D3D11_BUFFER_DESC vertexShaderBufferDesc;
	ZeroMemory(&vertexShaderBufferDesc, sizeof(vertexShaderBufferDesc));
	vertexShaderBufferDesc.Usage = D3D11_USAGE_DYNAMIC;
	vertexShaderBufferDesc.ByteWidth = sizeof(GenericVSData);
	vertexShaderBufferDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
	vertexShaderBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
	D3D11_CreateBuffer(&vertexShaderBufferDesc, NULL, &d3d.pipelines[PID_GENERIC].vertexBuffer, "generic vertex shader buffer");

	D3D11_BUFFER_DESC pixelShaderBufferDesc;
	ZeroMemory(&pixelShaderBufferDesc, sizeof(pixelShaderBufferDesc));
	pixelShaderBufferDesc.Usage = D3D11_USAGE_DYNAMIC;
	pixelShaderBufferDesc.ByteWidth = sizeof(GenericPSData);
	pixelShaderBufferDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
	pixelShaderBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
	D3D11_CreateBuffer(&pixelShaderBufferDesc, NULL, &d3d.pipelines[PID_GENERIC].pixelBuffer, "generic pixel shader buffer");

	// create all sampler states
	for(int textureMode = 0; textureMode < TM_COUNT; ++textureMode)
	{
		for(int wrapMode = 0; wrapMode < TW_COUNT; ++wrapMode)
		{
			const int index = ComputeSamplerStateIndex(wrapMode, textureMode);

			// @NOTE: D3D10_REQ_MAXANISOTROPY == D3D11_REQ_MAXANISOTROPY
			const int maxAnisotropy = Com_ClampInt(1, D3D11_REQ_MAXANISOTROPY, r_ext_max_anisotropy->integer);
			const D3D11_TEXTURE_ADDRESS_MODE mode = GetTextureAddressMode((textureWrap_t)wrapMode);
			ID3D11SamplerState* samplerState;
			D3D11_SAMPLER_DESC samplerDesc;
			ZeroMemory(&samplerDesc, sizeof(samplerDesc));
			samplerDesc.Filter = textureMode == TM_NEAREST ?
				D3D11_FILTER_MIN_MAG_MIP_POINT :
				((textureMode == TM_BILINEAR || maxAnisotropy == 1) ? D3D11_FILTER_MIN_MAG_MIP_LINEAR : D3D11_FILTER_ANISOTROPIC);
			samplerDesc.AddressU = mode;
			samplerDesc.AddressV = mode;
			samplerDesc.AddressW = mode;
			samplerDesc.ComparisonFunc = D3D11_COMPARISON_NEVER;
			samplerDesc.MinLOD = -D3D11_FLOAT32_MAX;
			samplerDesc.MaxLOD = D3D11_FLOAT32_MAX;
			samplerDesc.MaxAnisotropy = textureMode == TM_ANISOTROPIC ? maxAnisotropy : 1;
			hr = d3ds.device->CreateSamplerState(&samplerDesc, &samplerState);
			CheckAndName(hr, "CreateSamplerState", samplerState, va("sampler state %03d", index));

			d3d.samplerStates[index] = samplerState;
		}
	}

	// force set the default sampler states
	for(int i = 0; i < ARRAY_LEN(d3d.samplerStateIndices); ++i)
	{
		d3d.samplerStateIndices[i] = -1;
		ApplySamplerState(i, TW_CLAMP_TO_EDGE, TM_BILINEAR);
	}

	// create all blend states
	const int coverageModes = alphaToCoverageOK ? 2 : 1;
	for(int a = 0; a < coverageModes; ++a)
	{
		for(int s = 1; s < BLEND_STATE_COUNT; ++s)
		{
			for(int d = 1; d < BLEND_STATE_COUNT; ++d)
			{
				const int index = ComputeBlendStateIndex(s, d, a);

				ID3D11BlendState* blendState;
				D3D11_BLEND_DESC blendDesc;
				ZeroMemory(&blendDesc, sizeof(blendDesc));
				blendDesc.AlphaToCoverageEnable = a == 1 ? TRUE : FALSE;
				blendDesc.RenderTarget[0].BlendEnable = TRUE;
				blendDesc.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD;
				blendDesc.RenderTarget[0].SrcBlend = (D3D11_BLEND)s;
				blendDesc.RenderTarget[0].DestBlend = (D3D11_BLEND)d;
				blendDesc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD;
				blendDesc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE;
				blendDesc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO;
				blendDesc.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL;
				hr = d3ds.device->CreateBlendState(&blendDesc, &blendState);
				CheckAndName(hr, "CreateBlendState", blendState, va("blend state %03d", index));

				d3d.blendStates[index] = blendState;
			}
		}
	}

	// create all the depth/stencil states
	for(int disableDepth = 0; disableDepth < 2; ++disableDepth)
	{
		for(int depthFunc = 0; depthFunc < DF_COUNT; ++depthFunc)
		{
			for(int maskTrue = 0; maskTrue < 2; ++maskTrue)
			{
				const int index = ComputeDepthStencilStateIndex(disableDepth, depthFunc, maskTrue);

				ID3D11DepthStencilState* depthState;
				D3D11_DEPTH_STENCIL_DESC depthDesc;
				ZeroMemory(&depthDesc, sizeof(depthDesc));
				depthDesc.DepthEnable = disableDepth ? FALSE : TRUE;
				depthDesc.DepthFunc = GetDepthComparison((DepthFunc)depthFunc);
				depthDesc.DepthWriteMask = maskTrue ? D3D11_DEPTH_WRITE_MASK_ALL : D3D11_DEPTH_WRITE_MASK_ZERO;
				depthDesc.StencilEnable = FALSE;
				hr = d3ds.device->CreateDepthStencilState(&depthDesc, &depthState);
				CheckAndName(hr, "CreateDepthStencilState", depthState, va("depth/stencil state %03d", index));
				
				d3d.depthStencilStates[index] = depthState;
			}
		}
	}

	// create all the raster states
	for(int polygonOffset = 0; polygonOffset < 2; ++polygonOffset)
	{
		for(int wireFrame = 0; wireFrame < 2; ++wireFrame)
		{
			for(int cullType = 0; cullType < CT_COUNT; ++cullType)
			{
				const int index = ComputeRasterizerStateIndex(wireFrame, cullType, polygonOffset);

				ID3D11RasterizerState* rasterState;
				D3D11_RASTERIZER_DESC rasterDesc;
				ZeroMemory(&rasterDesc, sizeof(rasterDesc));
				rasterDesc.FillMode = wireFrame ? D3D11_FILL_WIREFRAME : D3D11_FILL_SOLID;
				rasterDesc.CullMode = GetCullMode((cullType_t)cullType);
				rasterDesc.FrontCounterClockwise = TRUE;
				rasterDesc.ScissorEnable = TRUE;
				rasterDesc.DepthClipEnable = TRUE;
				rasterDesc.DepthBiasClamp = 0.0f;
				rasterDesc.DepthBias = polygonOffset ? -1 : 0;
				rasterDesc.SlopeScaledDepthBias = polygonOffset ? -1.0f : 0.0f;
				hr = d3ds.device->CreateRasterizerState(&rasterDesc, &rasterState);
				CheckAndName(hr, "CreateRasterizerState", rasterState, va("raster state %03d", index));

				d3d.rasterStates[index] = rasterState;
			}
		}
	}

	//
	// post-processing
	//

	D3D11_CreateVertexShader(g_post_vs, ARRAY_LEN(g_post_vs), NULL, &d3d.pipelines[PID_POST_PROCESS].vertexShader, "post-process vertex shader");
	D3D11_CreatePixelShader(g_post_ps, ARRAY_LEN(g_post_ps), NULL, &d3d.pipelines[PID_POST_PROCESS].pixelShader, "post-process pixel shader");

	ZeroMemory(&vertexShaderBufferDesc, sizeof(vertexShaderBufferDesc));
	vertexShaderBufferDesc.Usage = D3D11_USAGE_DYNAMIC;
	vertexShaderBufferDesc.ByteWidth = sizeof(d3d.postVSData);
	vertexShaderBufferDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
	vertexShaderBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
	D3D11_CreateBuffer(&vertexShaderBufferDesc, NULL, &d3d.pipelines[PID_POST_PROCESS].vertexBuffer, "post-process vertex shader buffer");

	ZeroMemory(&pixelShaderBufferDesc, sizeof(pixelShaderBufferDesc));
	pixelShaderBufferDesc.Usage = D3D11_USAGE_DYNAMIC;
	pixelShaderBufferDesc.ByteWidth = sizeof(d3d.postPSData);
	pixelShaderBufferDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
	pixelShaderBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
	D3D11_CreateBuffer(&pixelShaderBufferDesc, NULL, &d3d.pipelines[PID_POST_PROCESS].pixelBuffer, "post-process pixel shader buffer");

	//
	// partial render target clears
	//

	D3D11_CreateVertexShader(g_clear_vs, ARRAY_LEN(g_clear_vs), NULL, &d3d.pipelines[PID_CLEAR].vertexShader, "clear vertex shader");
	D3D11_CreatePixelShader(g_clear_ps, ARRAY_LEN(g_clear_ps), NULL, &d3d.pipelines[PID_CLEAR].pixelShader, "clear pixel shader");

	ZeroMemory(&pixelShaderBufferDesc, sizeof(pixelShaderBufferDesc));
	pixelShaderBufferDesc.Usage = D3D11_USAGE_DYNAMIC;
	pixelShaderBufferDesc.ByteWidth = sizeof(d3d.clearPSData);
	pixelShaderBufferDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
	pixelShaderBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
	D3D11_CreateBuffer(&pixelShaderBufferDesc, NULL, &d3d.pipelines[PID_CLEAR].pixelBuffer, "clear pixel shader buffer");

	//
	// dynamic lights
	//

	D3D11_CreateVertexShader(g_dl_vs, ARRAY_LEN(g_dl_vs), NULL, &d3d.pipelines[PID_DYNAMIC_LIGHT].vertexShader, "dynamic light vertex shader");
	D3D11_CreatePixelShader(g_dl_ps, ARRAY_LEN(g_dl_ps), NULL, &d3d.pipelines[PID_DYNAMIC_LIGHT].pixelShader, "dynamic light pixel shader");

	D3D11_INPUT_ELEMENT_DESC dlInputLayoutDesc[] =
	{
		{ "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
		{ "NORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 1, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
		{ "COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 2, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
		{ "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 3, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 }
	};
	D3D11_CreateInputLayout(dlInputLayoutDesc, ARRAY_LEN(dlInputLayoutDesc), g_dl_vs, ARRAY_LEN(g_dl_vs), &d3d.pipelines[PID_DYNAMIC_LIGHT].inputLayout, "dynamic light input layout");

	ZeroMemory(&vertexShaderBufferDesc, sizeof(vertexShaderBufferDesc));
	vertexShaderBufferDesc.Usage = D3D11_USAGE_DYNAMIC;
	vertexShaderBufferDesc.ByteWidth = sizeof(DynamicLightVSData);
	vertexShaderBufferDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
	vertexShaderBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
	D3D11_CreateBuffer(&vertexShaderBufferDesc, NULL, &d3d.pipelines[PID_DYNAMIC_LIGHT].vertexBuffer, "dynamic light vertex shader buffer");

	ZeroMemory(&pixelShaderBufferDesc, sizeof(pixelShaderBufferDesc));
	pixelShaderBufferDesc.Usage = D3D11_USAGE_DYNAMIC;
	pixelShaderBufferDesc.ByteWidth = sizeof(DynamicLightPSData);
	pixelShaderBufferDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
	pixelShaderBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
	D3D11_CreateBuffer(&pixelShaderBufferDesc, NULL, &d3d.pipelines[PID_DYNAMIC_LIGHT].pixelBuffer, "dynamic light pixel shader buffer");

	//
	// soft sprites
	//

	D3D11_CreateVertexShader(g_sprite_vs, ARRAY_LEN(g_sprite_vs), NULL, &d3d.pipelines[PID_SOFT_SPRITE].vertexShader, "soft sprite vertex shader");
	D3D11_CreatePixelShader(g_sprite_ps, ARRAY_LEN(g_sprite_ps), NULL, &d3d.pipelines[PID_SOFT_SPRITE].pixelShader, "soft sprite pixel shader");
	
	D3D11_INPUT_ELEMENT_DESC ssInputLayoutDesc[] =
	{
		{ "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
		{ "COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 1, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 },
		{ "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 2, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 }
	};
	D3D11_CreateInputLayout(ssInputLayoutDesc, ARRAY_LEN(ssInputLayoutDesc), g_sprite_vs, ARRAY_LEN(g_sprite_vs), &d3d.pipelines[PID_SOFT_SPRITE].inputLayout, "soft sprite input layout");
	
	ZeroMemory(&vertexShaderBufferDesc, sizeof(vertexShaderBufferDesc));
	vertexShaderBufferDesc.Usage = D3D11_USAGE_DYNAMIC;
	vertexShaderBufferDesc.ByteWidth = sizeof(DepthFadeVSData);
	vertexShaderBufferDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
	vertexShaderBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
	D3D11_CreateBuffer(&vertexShaderBufferDesc, NULL, &d3d.pipelines[PID_SOFT_SPRITE].vertexBuffer, "soft sprite vertex shader buffer");

	ZeroMemory(&pixelShaderBufferDesc, sizeof(pixelShaderBufferDesc));
	pixelShaderBufferDesc.Usage = D3D11_USAGE_DYNAMIC;
	pixelShaderBufferDesc.ByteWidth = sizeof(DepthFadePSData);
	pixelShaderBufferDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
	pixelShaderBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
	D3D11_CreateBuffer(&pixelShaderBufferDesc, NULL, &d3d.pipelines[PID_SOFT_SPRITE].pixelBuffer, "soft sprite pixel shader buffer");

	//
	// mip-map generation
	//

	qbool mipGenOK = qfalse;
	if(r_gpuMipGen->integer && d3ds.device->GetFeatureLevel() == D3D_FEATURE_LEVEL_11_0)
	{
		d3d.errorMode = EM_SILENT;

		mipGenOK = qtrue;
		mipGenOK &= D3D11_CreateComputeShader(g_mip_pass_cs, ARRAY_LEN(g_mip_pass_cs), NULL, &d3d.mipDownSampleComputeShader, "mip-map down-sampling compute shader");
		mipGenOK &= D3D11_CreateComputeShader(g_mip_start_cs, ARRAY_LEN(g_mip_start_cs), NULL, &d3d.mipGammaToLinearComputeShader, "gamma-to-linear compute shader");
		mipGenOK &= D3D11_CreateComputeShader(g_mip_end_cs, ARRAY_LEN(g_mip_end_cs), NULL, &d3d.mipLinearToGammaComputeShader, "linear-to-gamma compute shader");

		D3D11_BUFFER_DESC bufferDesc;
		ZeroMemory(&bufferDesc, sizeof(bufferDesc));
		bufferDesc.Usage = D3D11_USAGE_DYNAMIC;
		bufferDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
		bufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
		bufferDesc.ByteWidth = sizeof(Down4CSData);
		mipGenOK &= D3D11_CreateBuffer(&bufferDesc, NULL, &d3d.mipDownSampleConstBuffer, "mip-map down-sampling compute shader buffer");
		bufferDesc.ByteWidth = sizeof(LinearToGammaCSData);
		mipGenOK &= D3D11_CreateBuffer(&bufferDesc, NULL, &d3d.mipLinearToGammaConstBuffer, "mip-map linear-to-gamma compute shader buffer");
		bufferDesc.ByteWidth = sizeof(GammaToLinearCSData);
		mipGenOK &= D3D11_CreateBuffer(&bufferDesc, NULL, &d3d.mipGammaToLinearConstBuffer, "mip-map gamma-to-linear compute shader buffer");

		for(int i = 0; i < ARRAY_LEN(d3d.mipGenTextures); ++i)
		{
			D3D11_TEXTURE2D_DESC textureDesc;
			ZeroMemory(&textureDesc, sizeof(textureDesc));
			textureDesc.Width = MAX_GPU_TEXTURE_SIZE;
			textureDesc.Height = MAX_GPU_TEXTURE_SIZE;
			textureDesc.MipLevels = 1;
			textureDesc.ArraySize = 1;
			textureDesc.Format = i == 2 ? DXGI_FORMAT_R8G8B8A8_UINT : DXGI_FORMAT_R16G16B16A16_FLOAT;
			textureDesc.SampleDesc.Count = 1;
			textureDesc.SampleDesc.Quality = 0;
			textureDesc.Usage = D3D11_USAGE_DEFAULT;
			textureDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS;
			textureDesc.CPUAccessFlags = 0;
			textureDesc.MiscFlags = 0;
			mipGenOK &= D3D11_CreateTexture2D(&textureDesc, 0, &d3d.mipGenTextures[i].texture, va("mip-map generation texture #%d", i + 1));

			ZeroMemory(&srvDesc, sizeof(srvDesc));
			srvDesc.Format = textureDesc.Format;
			srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
			srvDesc.Texture2D.MipLevels = 1;
			srvDesc.Texture2D.MostDetailedMip = 0;
			mipGenOK &= D3D11_CreateShaderResourceView(d3d.mipGenTextures[i].texture, &srvDesc, &d3d.mipGenTextures[i].srv, va("mip-map generation SRV #%d", i + 1));

			D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc;
			ZeroMemory(&uavDesc, sizeof(uavDesc));
			uavDesc.Format = textureDesc.Format;
			uavDesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D;
			uavDesc.Texture2D.MipSlice = 0;
			mipGenOK &= D3D11_CreateUnorderedAccessView(d3d.mipGenTextures[i].texture, &uavDesc, &d3d.mipGenTextures[i].uav, va("mip-map generation SRV #%d", i + 1));
		}

		d3d.errorMode = EM_FATAL;
	}
	
	//
	// misc.
	//

	// select the generic pipeline to begin with
	d3d.pipelineIndex = (PipelineId)-1;
	ApplyPipeline(PID_GENERIC);

	// force set all the default non-sampler states
	d3d.blendStateIndex = -1;
	d3d.depthStencilStateIndex = -1;
	d3d.rasterStateIndex = -1;
	ApplyState(GLS_DEFAULT, CT_TWO_SIDED, qfalse);

	glConfig.colorBits = 32;
	glConfig.depthBits = 24;
	glConfig.stencilBits = 8;
	glConfig.unused_maxTextureSize = MAX_GPU_TEXTURE_SIZE;
	glConfig.unused_maxActiveTextures = 0;
	glConfig.unused_driverType = 0;		// ICD
	glConfig.unused_hardwareType = 0;	// generic
	glConfig.unused_deviceSupportsGamma = qtrue;
	glConfig.unused_textureCompression = 0;	// no compression
	glConfig.unused_textureEnvAddAvailable = qtrue;
	glConfig.unused_displayFrequency = 0;
	glConfig.unused_isFullscreen = !!r_fullscreen->integer;
	glConfig.unused_stereoEnabled = qfalse;
	glConfig.unused_smpActive = qfalse;
	glConfig.extensions_string[0] = '\0';
	glConfig.renderer_string[0] = '\0';
	glConfig.vendor_string[0] = '\0';
	glConfig.version_string[0] = '\0';
	glInfo.displayFrequency = 0;
	glInfo.maxAnisotropy = D3D11_REQ_MAXANISOTROPY;	// @NOTE: D3D10_REQ_MAXANISOTROPY == D3D11_REQ_MAXANISOTROPY
	glInfo.maxTextureSize = MAX_GPU_TEXTURE_SIZE;
	glInfo.depthFadeSupport = r_depthFade->integer == 1;
	glInfo.mipGenSupport = mipGenOK;
	glInfo.alphaToCoverageSupport = alphaToCoverageOK;

	if(fullInit)
	{
		d3ds.adapterInfo.valid = qfalse;

		IDXGIDevice* dxgiDevice;
		if(SUCCEEDED(d3ds.device->QueryInterface(__uuidof(IDXGIDevice), (void**)&dxgiDevice)))
		{
			IDXGIAdapter* dxgiAdapter;
			if(SUCCEEDED(dxgiDevice->GetAdapter(&dxgiAdapter)))
			{
				DXGI_ADAPTER_DESC desc;
				if(SUCCEEDED(dxgiAdapter->GetDesc(&desc)))
				{
					char name[ARRAY_LEN(desc.Description) + 1];
					if(WideCharToMultiByte(CP_UTF7, 0, desc.Description, -1, name, sizeof(name) - 1, NULL, NULL) > 0)
					{
						Q_strncpyz(glConfig.renderer_string, name, sizeof(glConfig.renderer_string));
					}

					d3ds.adapterInfo.valid = qtrue;
					d3ds.adapterInfo.dedicatedSystemMemoryMB = (int)(desc.DedicatedSystemMemory >> 20);
					d3ds.adapterInfo.dedicatedVideoMemoryMB = (int)(desc.DedicatedVideoMemory >> 20);
					d3ds.adapterInfo.sharedSystemMemoryMB = (int)(desc.SharedSystemMemory >> 20);
				}
			}

			COM_RELEASE(dxgiDevice);
		}
	}

	if(r_d3d11_syncOffsets->integer == D3D11SO_AUTO)
	{
#if 0
		// only nVidia's drivers seem to consistently handle the extra IASetVertexBuffers calls well enough
		d3d.splitBufferOffsets = Q_stristr(glConfig.renderer_string, "NVIDIA") != NULL;
#else
		// however, we'll just treat all drivers as equally dumb by default for now
		d3d.splitBufferOffsets = D3D11SO_SYNCEDOFFSETS;
#endif
	}
	else
	{
		d3d.splitBufferOffsets = r_d3d11_syncOffsets->integer == D3D11SO_SPLITOFFSETS;
	}

	ri.Printf(PRINT_ALL, "MSAA: %d samples requested, %d selected\n", r_msaa->integer, sampleDesc.Count);

	return qtrue;
}

static void GAL_ShutDown(qbool fullShutDown)
{
	for(int i = 0; i < d3d.textureCount; ++i)
	{
		COM_RELEASE(d3d.textures[i].view);
		COM_RELEASE(d3d.textures[i].texture);
	}

	for(int i = 0; i < ARRAY_LEN(d3d.pipelines); ++i)
	{
		COM_RELEASE(d3d.pipelines[i].inputLayout);
		COM_RELEASE(d3d.pipelines[i].vertexShader);
		COM_RELEASE(d3d.pipelines[i].pixelShader);
		COM_RELEASE(d3d.pipelines[i].vertexBuffer);
		COM_RELEASE(d3d.pipelines[i].pixelBuffer);
	}

	for(int i = 0; i < ARRAY_LEN(d3d.mipGenTextures); ++i)
	{
		COM_RELEASE(d3d.mipGenTextures[i].texture);
		COM_RELEASE(d3d.mipGenTextures[i].srv);
		COM_RELEASE(d3d.mipGenTextures[i].uav);
	}

	for(int i = 0; i < ARRAY_LEN(d3d.vertexBuffers); ++i)
	{
		COM_RELEASE(d3d.vertexBuffers[i].buffer);
	}
	COM_RELEASE(d3d.indexBuffer.buffer);

	COM_RELEASE_ARRAY(d3d.samplerStates);
	COM_RELEASE_ARRAY(d3d.blendStates);
	COM_RELEASE_ARRAY(d3d.depthStencilStates);
	COM_RELEASE_ARRAY(d3d.rasterStates);

	COM_RELEASE(d3d.backBufferTexture);
	COM_RELEASE(d3d.backBufferRTView);
	COM_RELEASE(d3d.renderTargetTextureMS);
	COM_RELEASE(d3d.renderTargetViewMS);
	COM_RELEASE(d3d.resolveTexture);
	COM_RELEASE(d3d.resolveTextureShaderView);
	COM_RELEASE(d3d.depthStencilTexture);
	COM_RELEASE(d3d.depthStencilView);
	COM_RELEASE(d3d.depthStencilShaderView);
	COM_RELEASE(d3d.readbackTexture);
	COM_RELEASE(d3d.screenshotTexture);
	COM_RELEASE(d3d.screenshotTextureRTView);
	COM_RELEASE(d3d.mipGammaToLinearComputeShader);
	COM_RELEASE(d3d.mipLinearToGammaComputeShader);
	COM_RELEASE(d3d.mipDownSampleComputeShader);
	COM_RELEASE(d3d.mipDownSampleConstBuffer);
	COM_RELEASE(d3d.mipLinearToGammaConstBuffer);
	COM_RELEASE(d3d.mipGammaToLinearConstBuffer);

	for(int i = 0; i < ARRAY_LEN(d3d.frameQueries); ++i)
	{
		COM_RELEASE(d3d.frameQueries[i].disjoint);
		COM_RELEASE(d3d.frameQueries[i].frameStart);
		COM_RELEASE(d3d.frameQueries[i].frameEnd);
	}

	if(fullShutDown)
	{
#if defined(_DEBUG)
		// DXGIGetDebugInterface would be nicer but it requires Windows 8...
		// It doesn't reference the device, so the device doesn't show up as a false positive.
		ID3D11Debug* debug = NULL;
		const HRESULT debugQuery = d3ds.device->QueryInterface(IID_PPV_ARGS(&debug));
#endif

		d3ds.context->Release();
		d3ds.device->Release();
		d3ds.swapChain->Release();

#if defined(_DEBUG)
		OutputDebugStringA("================================================================\n");
		if(SUCCEEDED(debugQuery))
		{
			OutputDebugStringA("Summary\n");
			debug->ReportLiveDeviceObjects(D3D11_RLDO_SUMMARY);
			OutputDebugStringA("================================================================\n");
			OutputDebugStringA("Details\n");
			debug->ReportLiveDeviceObjects(D3D11_RLDO_DETAIL);
			debug->Release();
		}
		else
		{
			OutputDebugStringA("ID3D11Device::QueryInterface of ID3D11Debug failed!\n");
			OutputDebugStringA(va("%s\n", GetSystemErrorString(debugQuery)));
		}
		OutputDebugStringA("================================================================\n");
#endif

		if(d3ds.library != NULL)
			FreeLibrary(d3ds.library);

		memset(&d3ds, 0, sizeof(d3ds));
	}

	memset(&d3d, 0, sizeof(d3d));

	tr.numImages = 0;
	memset(tr.images, 0, sizeof(tr.images));
}

static void BeginQueries()
{
	FrameQueries* const queries = &d3d.frameQueries[d3d.frameQueriesWriteIndex];
	queries->valid = qfalse;
	COM_RELEASE(queries->disjoint);
	COM_RELEASE(queries->frameStart);
	COM_RELEASE(queries->frameEnd);

	D3D11_QUERY_DESC qd;
	qd.MiscFlags = 0;
	qd.Query = D3D11_QUERY_TIMESTAMP_DISJOINT;
	d3ds.device->CreateQuery(&qd, &queries->disjoint);
	qd.Query = D3D11_QUERY_TIMESTAMP;
	d3ds.device->CreateQuery(&qd, &queries->frameStart);
	d3ds.device->CreateQuery(&qd, &queries->frameEnd);
	if(queries->disjoint != NULL &&
	   queries->frameStart != NULL &&
	   queries->frameEnd != NULL)
	{
		queries->valid = qtrue;
		d3ds.context->Begin(queries->disjoint);
		d3ds.context->End(queries->frameStart);
	}
	else
	{
		COM_RELEASE(queries->disjoint);
		COM_RELEASE(queries->frameStart);
		COM_RELEASE(queries->frameEnd);
	}
}

static void EndQueries()
{
	// finish this frame
	FrameQueries* queries = &d3d.frameQueries[d3d.frameQueriesWriteIndex];
	if(queries->valid)
	{
		d3ds.context->End(queries->frameEnd);
		d3ds.context->End(queries->disjoint);
		d3d.frameQueriesWriteIndex = (d3d.frameQueriesWriteIndex + 1) % ARRAY_LEN(d3d.frameQueries);
	}

	// try to grab a previous frame's results
	D3D10_QUERY_DATA_TIMESTAMP_DISJOINT disjoint = { 0 };
	backEnd.pc3D[RB_USEC_GPU] = 0; // pessimism...
	queries = &d3d.frameQueries[d3d.frameQueriesReadIndex];
	if(queries->valid &&
	   d3ds.context->GetData(queries->disjoint, &disjoint, sizeof(disjoint), D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK)
	{
		UINT64 start = 0;
		UINT64 end = 0;
		if(!disjoint.Disjoint &&
		   disjoint.Frequency > 0 &&
		   d3ds.context->GetData(queries->frameStart, &start, sizeof(UINT64), D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK &&
		   d3ds.context->GetData(queries->frameEnd, &end, sizeof(UINT64), D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK)
		{
			backEnd.pc3D[RB_USEC_GPU] = int(((end - start) * UINT64(1000000)) / disjoint.Frequency);
		}
		d3d.frameQueriesReadIndex = (d3d.frameQueriesReadIndex + 1) % ARRAY_LEN(d3d.frameQueries);
	}
}

static void GAL_BeginFrame()
{
	BeginQueries();

	d3d.frameSeed[0] = (float)rand() / (float)RAND_MAX;
	d3d.frameSeed[1] = (float)rand() / (float)RAND_MAX;

	const FLOAT clearColor[4] = { 0.0f, 0.0f, 0.0f, 1.0f };
	const FLOAT clearColorDebug[4] = { 1.0f, 0.0f, 0.5f, 1.0f };
	d3ds.context->ClearRenderTargetView(d3d.renderTargetViewMS, r_clear->integer ? clearColorDebug : clearColor);
	d3ds.context->ClearDepthStencilView(d3d.depthStencilView, D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, 1.0f, 0);
	ApplyPipeline(PID_GENERIC);
	ApplyViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight, glConfig.vidHeight);
}

static void DrawPostProcess(float vsX, float vsY, float srX, float srY, qbool screenshot)
{
	d3d.postPSData.gamma = 1.0f / r_gamma->value;
	d3d.postPSData.brightness = r_brightness->value;
	d3d.postPSData.greyscale = r_greyscale->value;
	d3d.postVSData.scaleX = vsX;
	d3d.postVSData.scaleY = vsY;
	ApplyPipeline(screenshot ? PID_SCREENSHOT : PID_POST_PROCESS);
	ApplyState(GLS_DEPTHTEST_DISABLE, CT_TWO_SIDED, qfalse);
	UploadPendingShaderData();
	BindImage(0, tr.whiteImage);
	ApplyPixelShaderResource(0, d3d.resolveTextureShaderView);
	ApplySamplerState(0, TW_CLAMP_TO_EDGE, TM_BILINEAR);
	if(screenshot)
	{
		ApplyViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight, glConfig.vidHeight);
	}
	else
	{
		if(vsX < 1.0f || vsY < 1.0f)
		{
			const int x = (glInfo.winWidth - glInfo.winWidth * vsX) / 2.0f;
			const int y = (glInfo.winHeight - glInfo.winHeight * vsY) / 2.0f;
			ApplyViewport(0, 0, glInfo.winWidth, glInfo.winHeight, glInfo.winHeight);
			ApplyScissor(x, y, glConfig.vidWidth * srX, glConfig.vidHeight * srY, glInfo.winHeight);
		}
		else
		{
			ApplyViewportAndScissor(0, 0, glInfo.winWidth, glInfo.winHeight, glInfo.winHeight);
		}
	}
	d3ds.context->Draw(3, 0);
}

static void GAL_EndFrame()
{
	float vsX = 1.0f; // vertex shader scale factors
	float vsY = 1.0f;
	float srX = 1.0f; // scissor rectangle scale factors
	float srY = 1.0f;
	if(r_fullscreen->integer == 1 && r_mode->integer == VIDEOMODE_UPSCALE)
	{
		if(r_blitMode->integer == BLITMODE_CENTERED)
		{
			vsX = (float)glConfig.vidWidth / (float)glInfo.winWidth;
			vsY = (float)glConfig.vidHeight / (float)glInfo.winHeight;
		}
		else if(r_blitMode->integer == BLITMODE_ASPECT)
		{
			const float ars = (float)glConfig.vidWidth / (float)glConfig.vidHeight;
			const float ard = (float)glInfo.winWidth / (float)glInfo.winHeight;
			if(ard > ars)
			{
				vsX = ars / ard;
				vsY = 1.0f;
				srX = (float)glInfo.winHeight / (float)glConfig.vidHeight;
				srY = srX;
			}
			else
			{
				vsX = 1.0f;
				vsY = ard / ars;
				srX = (float)glInfo.winWidth / (float)glConfig.vidWidth;
				srY = srX;
			}
		}

		if(vsX != 1.0f || vsY != 1.0f)
		{
			const FLOAT clearColor[4] = { 0.0f, 0.0f, 0.0f, 1.0f };
			d3ds.context->ClearRenderTargetView(d3d.backBufferRTView, clearColor);
		}
	}

	d3ds.context->ResolveSubresource(d3d.resolveTexture, 0, d3d.renderTargetTextureMS, 0, d3d.formatColorRT);
	DrawPostProcess(vsX, vsY, srX, srY, qfalse);

	EndQueries();

	const UINT presentFlags = d3ds.flipAndTear && r_swapInterval->integer == 0 ? DXGI_PRESENT_ALLOW_TEARING : 0;
	const HRESULT hr = d3ds.swapChain->Present(abs(r_swapInterval->integer), presentFlags);

	enum PresentError
	{
		PE_NONE,
		PE_DEVICE_REMOVED,
		PE_DEVICE_RESET
	};
	PresentError presentError = PE_NONE;
	HRESULT deviceRemovedReason = S_OK;
	if(hr == DXGI_ERROR_DEVICE_REMOVED || hr == D3DDDIERR_DEVICEREMOVED)
	{
		deviceRemovedReason = d3ds.device->GetDeviceRemovedReason();
		if(deviceRemovedReason == DXGI_ERROR_DEVICE_RESET)
		{
			presentError = PE_DEVICE_RESET;
		}
		else
		{
			presentError = PE_DEVICE_REMOVED;
		}
	}
	else if(hr == DXGI_ERROR_DEVICE_RESET)
	{
		presentError = PE_DEVICE_RESET;
	}

	if(presentError == PE_DEVICE_REMOVED)
	{
		ri.Error(ERR_FATAL, "Direct3D device was removed! Reason: %s", GetDeviceRemovedReasonString(deviceRemovedReason));
	}
	else if(presentError == PE_DEVICE_RESET)
	{
		ri.Printf(PRINT_ERROR, "Direct3D device was reset! Restarting the video system...");
		Cmd_ExecuteString("vid_restart;");
	}
}

static void GAL_BeginSkyAndClouds(double depth)
{
	const float clipPlane[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
	memcpy(d3d.oldSkyClipPlane, d3d.clipPlane, sizeof(d3d.oldSkyClipPlane));
	memcpy(d3d.clipPlane, clipPlane, sizeof(d3d.clipPlane));

	UINT numVP = 1;
	d3ds.context->RSGetViewports(&numVP, &d3d.oldSkyViewport);
	d3d.oldSkyViewport.MinDepth = (FLOAT)depth;
	d3d.oldSkyViewport.MaxDepth = (FLOAT)depth;
	d3ds.context->RSSetViewports(1, &d3d.oldSkyViewport);
}

static void GAL_EndSkyAndClouds()
{
	d3d.oldSkyViewport.MinDepth = 0.0f;
	d3d.oldSkyViewport.MaxDepth = 1.0f;
	d3ds.context->RSSetViewports(1, &d3d.oldSkyViewport);

	memcpy(d3d.clipPlane, d3d.oldSkyClipPlane, sizeof(d3d.clipPlane));
}

static void WriteInvalidImage(int w, int h, int alignment, colorSpace_t colorSpace, void* out)
{
	if(colorSpace == CS_RGBA)
		memset(out, 0x7F, PAD(w * 4, alignment) * h);
	else if(colorSpace == CS_BGR)
		memset(out, 0x7F, PAD(w * 3, alignment) * h);
}

static void GAL_ReadPixels(int, int, int w, int h, int alignment, colorSpace_t colorSpace, void* out)
{
	if(d3d.readbackTexture == NULL)
	{
		WriteInvalidImage(w, h, alignment, colorSpace, out);
		return;
	}

	if(r_mode->integer != VIDEOMODE_UPSCALE)
	{
		// matching dimensions means we can copy the data directly from the back buffer
		d3ds.context->CopyResource(d3d.readbackTexture, d3d.backBufferTexture);
	}
	else
	{
		if(d3d.screenshotTexture == NULL || d3d.screenshotTextureRTView == NULL)
		{
			WriteInvalidImage(w, h, alignment, colorSpace, out);
			return;
		}

		// we render the post-process pass into an intermediate texture and
		// copy its content into the readback texture
		DrawPostProcess(1.0f, 1.0f, 1.0f, 1.0f, qtrue);
		d3ds.context->CopyResource(d3d.readbackTexture, d3d.screenshotTexture);
	}

	D3D11_MAPPED_SUBRESOURCE ms;
	HRESULT hr = d3ds.context->Map(d3d.readbackTexture, 0, D3D11_MAP_READ, NULL, &ms);
	if(FAILED(hr))
	{
		WriteInvalidImage(w, h, alignment, colorSpace, out);
		return;
	}

	if(colorSpace == CS_RGBA)
	{
		const byte* srcRow = (const byte*)ms.pData;
		byte* dstRow = (byte*)out + PAD(w * 4, alignment) * (h - 1);
		for(int y = 0; y < h; ++y)
		{
			const byte* s = srcRow;
			byte* d = dstRow;
			for(int x = 0; x < w; ++x)
			{
				d[0] = s[0];
				d[1] = s[1];
				d[2] = s[2];
				d[3] = 255;
				d += 4;
				s += 4;
			}

			srcRow += ms.RowPitch;
			dstRow -= PAD(w * 4, alignment);
		}
	}
	else if(colorSpace == CS_BGR)
	{
		const byte* srcRow = (const byte*)ms.pData;
		byte* dstRow = (byte*)out + PAD(w * 3, alignment) * (h - 1);
		for(int y = 0; y < h; ++y)
		{
			const byte* s = srcRow;
			byte* d = dstRow;
			for(int x = 0; x < w; ++x)
			{
				d[2] = s[0];
				d[1] = s[1];
				d[0] = s[2];
				d += 3;
				s += 4;
			}

			srcRow += ms.RowPitch;
			dstRow -= PAD(w * 3, alignment);
		}
	}

	d3ds.context->Unmap(d3d.readbackTexture, NULL);
}

static void GAL_CreateTexture(image_t* image, int mipCount, int w, int h)
{
	if(d3d.textureCount >= ARRAY_LEN(d3d.textures))
		ri.Error(ERR_FATAL, "Too many textures allocated for the Direct3D 11 back-end");

	CreateTexture(&d3d.textures[d3d.textureCount], image, mipCount, w, h);
	image->texnum = d3d.textureCount++;
}

static void GAL_UpdateTexture(image_t* image, int mip, int x, int y, int w, int h, const void* data)
{
	ID3D11Texture2D* texture = d3d.textures[image->texnum].texture;
	if(texture == NULL)
	{
		return;
	}

	const int rowBytes = image->format == TF_RGBA8 ? (w * 4) : w;
	const int imageBytes = rowBytes * h;
	D3D11_BOX box;
	box.front = 0;
	box.back = 1;
	box.left = x;
	box.right = x + w;
	box.top = y;
	box.bottom = y + h;
	d3ds.context->UpdateSubresource(texture, mip, &box, data, rowBytes, imageBytes);
}

static void GAL_UpdateScratch(image_t* image, int w, int h, const void* data, qbool dirty)
{
	if(image->texnum <= 0 || image->texnum > ARRAY_LEN(d3d.textures))
	{
		return;
	}

	if(w != image->width || h != image->height)
	{
		image->width = w;
		image->height = h;
		CreateTexture(&d3d.textures[image->texnum], image, 1, w, h);
		GAL_UpdateTexture(image, 0, 0, 0, w, h, data);
	}
	else if(dirty)
	{
		GAL_UpdateTexture(image, 0, 0, 0, w, h, data);
	}
}

static void GAL_CreateTextureEx(image_t* image, int mipCount, int mipOffset, int w, int h, const void* mip0)
{
	enum { GroupSize = 8, GroupMask = GroupSize - 1 };

	// needed so we don't bind a resource that's already bound
	ID3D11ShaderResourceView* const srvNull = NULL;
	ID3D11UnorderedAccessView* const uavNull = NULL;
	ID3D11Buffer* const bufferNull = NULL;

	GAL_CreateTexture(image, mipCount - mipOffset, image->width, image->height);
	const Texture* const texture = &d3d.textures[image->texnum];

	// upload source mip 0
	const int rowBytes = w * 4;
	const int imageBytes = rowBytes * h;
	D3D11_BOX box;
	box.front = 0;
	box.back = 1;
	box.left = 0;
	box.right = w;
	box.top = 0;
	box.bottom = h;
	d3ds.context->UpdateSubresource(d3d.mipGenTextures[2].texture, 0, &box, mip0, rowBytes, imageBytes);

	GammaToLinearCSData dataG2L;
	dataG2L.gamma = r_mipGenGamma->value;

	// create a linear color space copy of source mip 0
	int readIndex = 2;
	int writeIndex = 0;
	ResetShaderData(d3d.mipGammaToLinearConstBuffer, &dataG2L, sizeof(dataG2L));
	d3ds.context->CSSetShader(d3d.mipGammaToLinearComputeShader, NULL, 0);
	d3ds.context->CSSetConstantBuffers(0, 1, &bufferNull);
	d3ds.context->CSSetShaderResources(0, 1, &srvNull);
	d3ds.context->CSSetUnorderedAccessViews(0, 1, &uavNull, NULL);
	d3ds.context->CSSetConstantBuffers(0, 1, &d3d.mipGammaToLinearConstBuffer);
	d3ds.context->CSSetShaderResources(0, 1, &d3d.mipGenTextures[readIndex].srv);
	d3ds.context->CSSetUnorderedAccessViews(0, 1, &d3d.mipGenTextures[writeIndex].uav, NULL);
	d3ds.context->Dispatch((w + GroupMask) / GroupSize, (h + GroupMask) / GroupSize, 1);

	LinearToGammaCSData dataL2G;
	dataL2G.intensity = r_intensity->value;
	dataL2G.invGamma = 1.0f / r_mipGenGamma->value;

	// copy to destination mip 0 now if needed
	if(mipOffset == 0)
	{
		readIndex = 0;
		writeIndex = 2;
		memcpy(dataL2G.blendColor, r_mipBlendColors[0], sizeof(dataL2G.blendColor));
		ResetShaderData(d3d.mipLinearToGammaConstBuffer, &dataL2G, sizeof(dataL2G));
		d3ds.context->CSSetShader(d3d.mipLinearToGammaComputeShader, NULL, 0);
		d3ds.context->CSSetConstantBuffers(0, 1, &bufferNull);
		d3ds.context->CSSetShaderResources(0, 1, &srvNull);
		d3ds.context->CSSetUnorderedAccessViews(0, 1, &uavNull, NULL);
		d3ds.context->CSSetConstantBuffers(0, 1, &d3d.mipLinearToGammaConstBuffer);
		d3ds.context->CSSetShaderResources(0, 1, &d3d.mipGenTextures[readIndex].srv);
		d3ds.context->CSSetUnorderedAccessViews(0, 1, &d3d.mipGenTextures[writeIndex].uav, NULL);
		d3ds.context->Dispatch((w + GroupMask) / GroupSize, (h + GroupMask) / GroupSize, 1);

		box.front = 0;
		box.back = 1;
		box.left = 0;
		box.right = w;
		box.top = 0;
		box.bottom = h;
		d3ds.context->CopySubresourceRegion(texture->texture, 0, 0, 0, 0, d3d.mipGenTextures[2].texture, 0, &box);
	}

	Down4CSData dataDown;
	memcpy(dataDown.weights, tr.mipFilter, sizeof(dataDown.weights));
	dataDown.clampMode = image->wrapClampMode == TW_REPEAT ? 0 : 1;

	for(int i = 1; i < mipCount; ++i)
	{
		const int w1 = w;
		const int h1 = h;
		w = max(w / 2, 1);
		h = max(h / 2, 1);
		
		// down-sample on the X-axis
		readIndex = 0;
		writeIndex = 1;
		dataDown.scale[0] = w1 / w;
		dataDown.scale[1] = 1;
		dataDown.maxSize[0] = w1 - 1;
		dataDown.maxSize[1] = h1 - 1;
		dataDown.offset[0] = 1;
		dataDown.offset[1] = 0;
		ResetShaderData(d3d.mipDownSampleConstBuffer, &dataDown, sizeof(dataDown));
		d3ds.context->CSSetShader(d3d.mipDownSampleComputeShader, NULL, 0);
		d3ds.context->CSSetConstantBuffers(0, 1, &bufferNull);
		d3ds.context->CSSetShaderResources(0, 1, &srvNull);
		d3ds.context->CSSetUnorderedAccessViews(0, 1, &uavNull, NULL);
		d3ds.context->CSSetConstantBuffers(0, 1, &d3d.mipDownSampleConstBuffer);
		d3ds.context->CSSetShaderResources(0, 1, &d3d.mipGenTextures[readIndex].srv);
		d3ds.context->CSSetUnorderedAccessViews(0, 1, &d3d.mipGenTextures[writeIndex].uav, NULL);
		d3ds.context->Dispatch((w + GroupMask) / GroupSize, (h1 + GroupMask) / GroupSize, 1);

		// down-sample on the Y-axis
		readIndex = 1;
		writeIndex = 0;
		dataDown.scale[0] = 1;
		dataDown.scale[1] = h1 / h;
		dataDown.maxSize[0] = w - 1;
		dataDown.maxSize[1] = h1 - 1;
		dataDown.offset[0] = 0;
		dataDown.offset[1] = 1;
		ResetShaderData(d3d.mipDownSampleConstBuffer, &dataDown, sizeof(dataDown));
		d3ds.context->CSSetShaderResources(0, 1, &srvNull);
		d3ds.context->CSSetUnorderedAccessViews(0, 1, &uavNull, NULL);
		d3ds.context->CSSetShaderResources(0, 1, &d3d.mipGenTextures[readIndex].srv);
		d3ds.context->CSSetUnorderedAccessViews(0, 1, &d3d.mipGenTextures[writeIndex].uav, NULL);
		d3ds.context->Dispatch((w + GroupMask) / GroupSize, (h + GroupMask) / GroupSize, 1);

		const int destMip = i - mipOffset;
		if(destMip >= 0)
		{
			// convert to final format
			readIndex = 0;
			writeIndex = 2;
			memcpy(dataL2G.blendColor, r_mipBlendColors[r_colorMipLevels->integer ? destMip : 0], sizeof(dataL2G.blendColor));
			ResetShaderData(d3d.mipLinearToGammaConstBuffer, &dataL2G, sizeof(dataL2G));
			d3ds.context->CSSetShader(d3d.mipLinearToGammaComputeShader, NULL, 0);
			d3ds.context->CSSetConstantBuffers(0, 1, &bufferNull);
			d3ds.context->CSSetShaderResources(0, 1, &srvNull);
			d3ds.context->CSSetUnorderedAccessViews(0, 1, &uavNull, NULL);
			d3ds.context->CSSetConstantBuffers(0, 1, &d3d.mipLinearToGammaConstBuffer);
			d3ds.context->CSSetShaderResources(0, 1, &d3d.mipGenTextures[readIndex].srv);
			d3ds.context->CSSetUnorderedAccessViews(0, 1, &d3d.mipGenTextures[writeIndex].uav, NULL);
			d3ds.context->Dispatch((w + GroupMask) / GroupSize, (h + GroupMask) / GroupSize, 1);

			// write out the result
			box.front = 0;
			box.back = 1;
			box.left = 0;
			box.right = w;
			box.top = 0;
			box.bottom = h;
			d3ds.context->CopySubresourceRegion(texture->texture, destMip, 0, 0, 0, d3d.mipGenTextures[2].texture, 0, &box);
		}
	}
}

static void DrawGeneric()
{
	AppendVertexData(&d3d.indexBuffer, tess.indexes, tess.numIndexes);
	if(d3d.splitBufferOffsets)
	{
		AppendVertexData(&d3d.vertexBuffers[VB_POSITION], tess.xyz, tess.numVertexes);
	}

	for(int i = 0; i < tess.shader->numStages; ++i)
	{
		const shaderStage_t* stage = tess.xstages[i];

		if(d3d.splitBufferOffsets)
		{
			AppendVertexData(&d3d.vertexBuffers[VB_TEXCOORD], tess.svars[i].texcoordsptr, tess.numVertexes);
			AppendVertexData(&d3d.vertexBuffers[VB_COLOR], tess.svars[i].colors, tess.numVertexes);
			if(stage->mtStages == 1)
			{
				AppendVertexData(&d3d.vertexBuffers[VB_TEXCOORD2], tess.svars[i + 1].texcoordsptr, tess.numVertexes);
			}
		}
		else
		{
			const void* pointers[VB_COUNT];
			pointers[VB_POSITION] = tess.xyz;
			pointers[VB_NORMAL] = NULL;
			pointers[VB_TEXCOORD] = tess.svars[i].texcoordsptr;
			pointers[VB_TEXCOORD2] = stage->mtStages == 1 ? tess.svars[i + 1].texcoordsptr : NULL;
			pointers[VB_COLOR] = tess.svars[i].colors;
			AppendVertexDataGroup(pointers, tess.numVertexes);
		}

		ApplyState(stage->stateBits, tess.shader->cullType, tess.shader->polygonOffset);

		BindBundle(0, &stage->bundle);

		if(stage->mtStages == 1)
		{
			const shaderStage_t* stage2 = tess.xstages[i + 1];
			d3d.texEnv = stage2->mtEnv;
			BindBundle(1, &stage2->bundle);
			i += 1;
		}
		else
		{
			BindImage(1, tr.whiteImage);
			d3d.texEnv = TE_DISABLED;
		}

		UploadPendingShaderData();

		DrawIndexed(tess.numIndexes);
	}

	if(tess.drawFog)
	{
		if(d3d.splitBufferOffsets)
		{
			AppendVertexData(&d3d.vertexBuffers[VB_TEXCOORD], tess.svarsFog.texcoordsptr, tess.numVertexes);
			AppendVertexData(&d3d.vertexBuffers[VB_COLOR], tess.svarsFog.colors, tess.numVertexes);
		}
		else
		{
			const void* pointers[VB_COUNT];
			pointers[VB_POSITION] = tess.xyz;
			pointers[VB_NORMAL] = NULL;
			pointers[VB_TEXCOORD] = tess.svarsFog.texcoordsptr;
			pointers[VB_TEXCOORD2] = NULL;
			pointers[VB_COLOR] = tess.svarsFog.colors;
			AppendVertexDataGroup(pointers, tess.numVertexes);
		}

		ApplyState(tess.fogStateBits, tess.shader->cullType, tess.shader->polygonOffset);

		BindImage(0, tr.fogImage);
		BindImage(1, tr.whiteImage);

		d3d.texEnv = TE_DISABLED;
		UploadPendingShaderData();

		DrawIndexed(tess.numIndexes);
	}
}

static void DrawDynamicLight()
{
	const int stageIndex = tess.shader->lightingStages[ST_DIFFUSE];
	const shaderStage_t* stage = tess.xstages[stageIndex];

	AppendVertexData(&d3d.indexBuffer, tess.dlIndexes, tess.dlNumIndexes);
	if(d3d.splitBufferOffsets)
	{
		AppendVertexData(&d3d.vertexBuffers[VB_POSITION], tess.xyz, tess.numVertexes);
		AppendVertexData(&d3d.vertexBuffers[VB_NORMAL], tess.normal, tess.numVertexes);
		AppendVertexData(&d3d.vertexBuffers[VB_TEXCOORD], tess.svars[stageIndex].texcoordsptr, tess.numVertexes);
	}
	else
	{
		const void* pointers[VB_COUNT];
		pointers[VB_POSITION] = tess.xyz;
		pointers[VB_NORMAL] = tess.normal;
		pointers[VB_TEXCOORD] = tess.svars[stageIndex].texcoordsptr;
		pointers[VB_TEXCOORD2] = NULL;
		pointers[VB_COLOR] = NULL;
		AppendVertexDataGroup(pointers, tess.numVertexes);
	}

	ApplyState(backEnd.dlStateBits, tess.shader->cullType, tess.shader->polygonOffset);
	BindBundle(0, &stage->bundle);

	UploadPendingShaderData();

	DrawIndexed(tess.dlNumIndexes);
}

static void DrawDepthFade()
{
	AppendVertexData(&d3d.indexBuffer, tess.indexes, tess.numIndexes);
	if(d3d.splitBufferOffsets)
	{
		AppendVertexData(&d3d.vertexBuffers[VB_POSITION], tess.xyz, tess.numVertexes);
	}

	for(int i = 0; i < tess.shader->numStages; ++i)
	{
		const shaderStage_t* stage = tess.xstages[i];

		if(d3d.splitBufferOffsets)
		{
			AppendVertexData(&d3d.vertexBuffers[VB_TEXCOORD], tess.svars[i].texcoordsptr, tess.numVertexes);
			AppendVertexData(&d3d.vertexBuffers[VB_COLOR], tess.svars[i].colors, tess.numVertexes);
		}
		else
		{
			const void* pointers[VB_COUNT];
			pointers[VB_POSITION] = tess.xyz;
			pointers[VB_NORMAL] = NULL;
			pointers[VB_TEXCOORD] = tess.svars[i].texcoordsptr;
			pointers[VB_TEXCOORD2] = NULL;
			pointers[VB_COLOR] = tess.svars[i].colors;
			AppendVertexDataGroup(pointers, tess.numVertexes);
		}

		ApplyState(stage->stateBits, tess.shader->cullType, tess.shader->polygonOffset);

		BindBundle(0, &stage->bundle);

		UploadPendingShaderData();

		DrawIndexed(tess.numIndexes);
	}
}

static void GAL_Draw(drawType_t type)
{
	if(type == DT_GENERIC)
	{
		ApplyPipeline(PID_GENERIC);
		DrawGeneric();
	}
	else if(type == DT_DYNAMIC_LIGHT)
	{
		ApplyPipeline(PID_DYNAMIC_LIGHT);
		DrawDynamicLight();
	}
	else if(type == DT_SOFT_SPRITE)
	{
		ApplyPipeline(PID_SOFT_SPRITE);
		DrawDepthFade();
	}
}

static void GAL_Begin2D()
{
	R_MakeIdentityMatrix(d3d.modelViewMatrix);
	R_MakeOrthoProjectionMatrix(d3d.projectionMatrix, glConfig.vidWidth, glConfig.vidHeight);
	ApplyViewportAndScissor(0, 0, glConfig.vidWidth, glConfig.vidHeight, glConfig.vidHeight);
	ApplyState(GLS_DEFAULT_2D, CT_TWO_SIDED, qfalse);
}

static void ClearViews(qbool shouldClearColor, const FLOAT* clearColor)
{
	// Direct3D 11.1 does provide support for partial clears for color and depth-only views.
	// However, depth/stencil views are not supported so we can't use that right now.
	// Getting rid of the stencil buffer is definitely on the cards.

	const qbool fullClear =
		backEnd.viewParms.viewportX == 0 &&
		backEnd.viewParms.viewportY == 0 &&
		backEnd.viewParms.viewportWidth == glConfig.vidWidth &&
		backEnd.viewParms.viewportHeight == glConfig.vidHeight;

	if(fullClear)
	{
		d3ds.context->ClearDepthStencilView(d3d.depthStencilView, D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, 1.0f, 0);
		if(shouldClearColor)
		{
			d3ds.context->ClearRenderTargetView(d3d.renderTargetViewMS, clearColor);
		}
	}
	else
	{
		const unsigned int stateBits =
			GLS_DEPTHMASK_TRUE | GLS_DEPTHFUNC_ALWAYS |
			GLS_SRCBLEND_SRC_ALPHA | GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA;
		ApplyPipeline(PID_CLEAR);
		ApplyState(stateBits, CT_TWO_SIDED, qfalse);
		d3d.clearPSData.color[0] = clearColor[0];
		d3d.clearPSData.color[1] = clearColor[1];
		d3d.clearPSData.color[2] = clearColor[2];
		d3d.clearPSData.color[3] = shouldClearColor ? 1.0f : 0.0f;
		UploadPendingShaderData();
		d3ds.context->Draw(3, 0);
		ApplyPipeline(PID_GENERIC);
	}
}

static void GAL_Begin3D()
{
	ApplyPipeline(PID_GENERIC);
	memcpy(d3d.projectionMatrix, backEnd.viewParms.projectionMatrix, sizeof(d3d.projectionMatrix));
	ApplyViewportAndScissor(backEnd.viewParms.viewportX, backEnd.viewParms.viewportY, backEnd.viewParms.viewportWidth, backEnd.viewParms.viewportHeight, glConfig.vidHeight);

	qbool shouldClearColor = qfalse;
	FLOAT clearColor[4] = { 0.0f, 0.0f, 0.0f, 1.0f };
	if(backEnd.refdef.rdflags & RDF_HYPERSPACE)
	{
		const FLOAT c = RB_HyperspaceColor();
		clearColor[0] = c;
		clearColor[1] = c;
		clearColor[2] = c;
		shouldClearColor = qtrue;
	}
	else if(r_fastsky->integer && !(backEnd.refdef.rdflags & RDF_NOWORLDMODEL))
	{
		shouldClearColor = qtrue;
	}
	ClearViews(shouldClearColor, clearColor);

	if(backEnd.viewParms.isPortal)
	{
		float plane[4];
		plane[0] = backEnd.viewParms.portalPlane.normal[0];
		plane[1] = backEnd.viewParms.portalPlane.normal[1];
		plane[2] = backEnd.viewParms.portalPlane.normal[2];
		plane[3] = backEnd.viewParms.portalPlane.dist;

		float plane2[4];
		plane2[0] = DotProduct(backEnd.viewParms.orient.axis[0], plane);
		plane2[1] = DotProduct(backEnd.viewParms.orient.axis[1], plane);
		plane2[2] = DotProduct(backEnd.viewParms.orient.axis[2], plane);
		plane2[3] = DotProduct(plane, backEnd.viewParms.orient.origin) - plane[3];

		float* o = plane;
		const float* m = s_flipMatrix;
		const float* v = plane2;
		o[0] = m[0] * v[0] + m[4] * v[1] + m[ 8] * v[2] + m[12] * v[3];
		o[1] = m[1] * v[0] + m[5] * v[1] + m[ 9] * v[2] + m[13] * v[3];
		o[2] = m[2] * v[0] + m[6] * v[1] + m[10] * v[2] + m[14] * v[3];
		o[3] = m[3] * v[0] + m[7] * v[1] + m[11] * v[2] + m[15] * v[3];

		memcpy(d3d.clipPlane, plane, sizeof(d3d.clipPlane));
	}
	else
	{
		const float clipPlane[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
		memcpy(d3d.clipPlane, clipPlane, sizeof(d3d.clipPlane));
	}

	ApplyState(GLS_DEFAULT, CT_TWO_SIDED, qfalse);
}

static void GAL_SetModelViewMatrix(const float* matrix)
{
	memcpy(d3d.modelViewMatrix, matrix, sizeof(d3d.modelViewMatrix));
}

static void GAL_SetDepthRange(double near, double far)
{
	D3D11_VIEWPORT viewport;
	UINT numVP = 1;
	d3ds.context->RSGetViewports(&numVP, &viewport);

	viewport.MinDepth = (float)near;
	viewport.MaxDepth = (float)far;
	d3ds.context->RSSetViewports(1, &viewport);
}

static void GAL_BeginDynamicLight()
{
	const dlight_t* const dl = tess.light;

	d3d.osEyePos[0] = backEnd.orient.viewOrigin[0];
	d3d.osEyePos[1] = backEnd.orient.viewOrigin[1];
	d3d.osEyePos[2] = backEnd.orient.viewOrigin[2];
	d3d.osEyePos[3] = 1.0f;
	d3d.osLightPos[0] = dl->transformed[0];
	d3d.osLightPos[1] = dl->transformed[1];
	d3d.osLightPos[2] = dl->transformed[2];
	d3d.osLightPos[3] = 1.0f;
	d3d.lightColor[0] = dl->color[0];
	d3d.lightColor[1] = dl->color[1];
	d3d.lightColor[2] = dl->color[2];
	d3d.lightRadius = 1.0f / Square(dl->radius);
}

static void GAL_PrintInfo()
{
	ri.Printf(PRINT_ALL, "Direct3D device feature level: %s\n", d3ds.device->GetFeatureLevel() == D3D_FEATURE_LEVEL_11_0 ? "11.0" : "10.1");
	ri.Printf(PRINT_ALL, "Direct3D vertex buffer upload strategy: %s\n", d3d.splitBufferOffsets ? "split offsets" : "sync'd offsets");
	ri.Printf(PRINT_ALL, "DXGI presentation model: %s\n", d3ds.flipAndTear ? "flip + discard" : "blit + discard");
	if(d3ds.adapterInfo.valid)
	{
		ri.Printf(PRINT_ALL, "%6d MB of dedicated GPU memory\n", d3ds.adapterInfo.dedicatedVideoMemoryMB);
		ri.Printf(PRINT_ALL, "%6d MB of shared system memory\n", d3ds.adapterInfo.sharedSystemMemoryMB);
		ri.Printf(PRINT_ALL, "%6d MB of dedicated system memory\n", d3ds.adapterInfo.dedicatedSystemMemoryMB);
	}
}

qbool GAL_GetD3D11(graphicsAPILayer_t* rb)
{
	rb->Init = &GAL_Init;
	rb->ShutDown = &GAL_ShutDown;
	rb->BeginSkyAndClouds = &GAL_BeginSkyAndClouds;
	rb->EndSkyAndClouds = &GAL_EndSkyAndClouds;
	rb->ReadPixels = &GAL_ReadPixels;
	rb->BeginFrame = &GAL_BeginFrame;
	rb->EndFrame = &GAL_EndFrame;
	rb->CreateTexture = &GAL_CreateTexture;
	rb->UpdateTexture = &GAL_UpdateTexture;
	rb->UpdateScratch = &GAL_UpdateScratch;
	rb->CreateTextureEx = &GAL_CreateTextureEx;
	rb->Draw = &GAL_Draw;
	rb->Begin2D = &GAL_Begin2D;
	rb->Begin3D = &GAL_Begin3D;
	rb->SetModelViewMatrix = &GAL_SetModelViewMatrix;
	rb->SetDepthRange = &GAL_SetDepthRange;
	rb->BeginDynamicLight = &GAL_BeginDynamicLight;
	rb->PrintInfo = &GAL_PrintInfo;

	return qtrue;
}


#else


#include "tr_local.h"


qbool GAL_GetD3D11(graphicsAPILayer_t* rb)
{
	return qfalse;
}


#endif