/*
===========================================================================

Doom 3 BFG Edition GPL Source Code
Copyright (C) 1993-2012 id Software LLC, a ZeniMax Media company.

This file is part of the Doom 3 BFG Edition GPL Source Code ("Doom 3 BFG Edition Source Code").

Doom 3 BFG Edition Source Code is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

Doom 3 BFG Edition Source Code is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Doom 3 BFG Edition Source Code.  If not, see <http://www.gnu.org/licenses/>.

In addition, the Doom 3 BFG Edition Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 BFG Edition Source Code.  If not, please request a copy in writing from id Software at the address below.

If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.

===========================================================================
*/
/*
================================================================================================
Contains the DxtEncoder implementation.
================================================================================================
*/

#pragma hdrstop
#include "DXTCodec_local.h"
#include "DXTCodec.h"

#define INSET_COLOR_SHIFT		4		// inset the bounding box with ( range >> shift )
#define INSET_ALPHA_SHIFT		5		// inset alpha channel

#define C565_5_MASK				0xF8	// 0xFF minus last three bits
#define C565_6_MASK				0xFC	// 0xFF minus last two bits

#define NVIDIA_7X_HARDWARE_BUG_FIX		// keep the DXT5 colors sorted as: max, min

typedef uint16	word;
typedef uint32	dword;

/*
========================
idDxtEncoder::NV4XHardwareBugFix
========================
*/
void idDxtEncoder::NV4XHardwareBugFix( byte* minColor, byte* maxColor ) const
{
	int minq = ( ( minColor[0] << 16 ) | ( minColor[1] << 8 ) | minColor[2] ) & 0x00F8FCF8;
	int maxq = ( ( maxColor[0] << 16 ) | ( maxColor[1] << 8 ) | maxColor[2] ) & 0x00F8FCF8;
	int mask = -( minq > maxq ) & 0x00FFFFFF;
	int min = *( int* )minColor;
	int max = *( int* )maxColor;
	min ^= max;
	max ^= ( min & mask );
	min ^= max;
	*( int* )minColor = min;
	*( int* )maxColor = max;
}

/*
========================
idDxtEncoder::HasConstantValuePer4x4Block
========================
*/
bool idDxtEncoder::HasConstantValuePer4x4Block( const byte* inBuf, int width, int height, int channel ) const
{
	if( width < 4 || height < 4 )
	{
		byte value = inBuf[channel];
		for( int k = 0; k < height; k++ )
		{
			for( int l = 0; l < width; l++ )
			{
				if( inBuf[( k * width + l ) * 4 + channel] != value )
				{
					return false;
				}
			}
		}
		return true;
	}
	
	for( int j = 0; j < height; j += 4, inBuf += width * 4 * 4 )
	{
		for( int i = 0; i < width; i += 4 )
		{
			const byte* inPtr = inBuf + i * 4;
			byte value = inPtr[channel];
			for( int k = 0; k < 4; k++ )
			{
				for( int l = 0; l < 4; l++ )
				{
					if( inPtr[( k * width + l ) * 4 + channel] != value )
					{
						return false;
					}
				}
			}
		}
		inBuf += srcPadding;
	}
	return true;
}

/*
========================
idDxtEncoder::WriteTinyColorDXT1
========================
*/
void idDxtEncoder::WriteTinyColorDXT1( const byte* inBuf, int width, int height )
{
	int numBlocks = ( ( width + 3 ) / 4 ) * ( ( height + 3 ) / 4 );
	int stride = ( ( width * height ) / numBlocks ) * 4;	// number of bytes from one block to the next
	// example: 2x8 pixels
	// numBlocks = 2
	// stride = 32 bytes (8 pixels)
	
	for( int i = 0; i < numBlocks; i++ )
	{
		// FIXME: This just emits a fake block based on the color at position 0,0
		EmitUShort( ColorTo565( inBuf ) );
		EmitUShort( 0 );	// dummy, never used
		EmitUInt( 0 );		// 4 color index bytes all use the first color
		
		inBuf += stride;
	}
}

/*
========================
idDxtEncoder::WriteTinyColorDXT5
========================
*/
void idDxtEncoder::WriteTinyColorDXT5( const byte* inBuf, int width, int height )
{
	int numBlocks = ( ( width + 3 ) / 4 ) * ( ( height + 3 ) / 4 );
	int stride = ( ( width * height ) / numBlocks ) * 4;	// number of bytes from one block to the next
	// example: 2x8 pixels
	// numBlocks = 2
	// stride = 32 bytes (8 pixels)
	
	for( int i = 0; i < numBlocks; i++ )
	{
		// FIXME: This just emits a fake block based on the color at position 0,0
		EmitByte( inBuf[3] );
		EmitByte( 0 );		// dummy, never used
		EmitByte( 0 );		// 6 alpha index bytes all use the first alpha
		EmitByte( 0 );
		EmitByte( 0 );
		EmitByte( 0 );
		EmitByte( 0 );
		EmitByte( 0 );
		
		EmitUShort( ColorTo565( inBuf ) );
		EmitUShort( 0 );	// dummy, never used
		EmitUInt( 0 );		// 4 color index bytes all use the first color
		
		inBuf += stride;
	}
}

/*
========================
idDxtEncoder::WriteTinyColorCTX1DXT5A
========================
*/
void idDxtEncoder::WriteTinyColorCTX1DXT5A( const byte* inBuf, int width, int height )
{
	int numBlocks = ( ( width + 3 ) / 4 ) * ( ( height + 3 ) / 4 );
	int stride = ( ( width * height ) / numBlocks ) * 4;	// number of bytes from one block to the next
	// example: 2x8 pixels
	// numBlocks = 2
	// stride = 32 bytes (8 pixels)
	
	for( int i = 0; i < numBlocks; i++ )
	{
		// FIXME: This just emits a fake block based on the color at position 0,0
		EmitByte( inBuf[0] );
		EmitByte( inBuf[1] );
		EmitByte( inBuf[0] );
		EmitByte( inBuf[1] );
		EmitUInt( 0 );		// 4 color index bytes all use the first color
		
		EmitByte( inBuf[3] );
		EmitByte( 0 );		// dummy, never used
		EmitByte( 0 );		// 6 alpha index bytes all use the first alpha
		EmitByte( 0 );
		EmitByte( 0 );
		EmitByte( 0 );
		EmitByte( 0 );
		EmitByte( 0 );
		
		inBuf += stride;
	}
}

/*
========================
idDxtEncoder::WriteTinyNormalMapDXT5
========================
*/
void idDxtEncoder::WriteTinyNormalMapDXT5( const byte* inBuf, int width, int height )
{
	int numBlocks = ( ( width + 3 ) / 4 ) * ( ( height + 3 ) / 4 );
	int stride = ( ( width * height ) / numBlocks ) * 4;	// number of bytes from one block to the next
	// example: 2x8 pixels
	// numBlocks = 2
	// stride = 32 bytes (8 pixels)
	
	for( int i = 0; i < numBlocks; i++ )
	{
		// FIXME: This just emits a fake block based on the normal at position 0,0
		EmitByte( inBuf[3] );
		EmitByte( 0 );		// dummy, never used
		EmitByte( 0 );		// 6 alpha index bytes all use the first alpha
		EmitByte( 0 );
		EmitByte( 0 );
		EmitByte( 0 );
		EmitByte( 0 );
		EmitByte( 0 );
		
		EmitUShort( ColorTo565( inBuf[0], inBuf[1], inBuf[2] ) );
		EmitUShort( 0 );	// dummy, never used
		EmitUInt( 0 );		// 4 color index bytes all use the first color
		
		inBuf += stride;
	}
}

/*
========================
idDxtEncoder::WriteTinyNormalMapDXN
========================
*/
void idDxtEncoder::WriteTinyNormalMapDXN( const byte* inBuf, int width, int height )
{
	int numBlocks = ( ( width + 3 ) / 4 ) * ( ( height + 3 ) / 4 );
	int stride = ( ( width * height ) / numBlocks ) * 4;	// number of bytes from one block to the next
	// example: 2x8 pixels
	// numBlocks = 2
	// stride = 32 bytes (8 pixels)
	
	for( int i = 0; i < numBlocks; i++ )
	{
		// FIXME: This just emits a fake block based on the normal at position 0,0
		EmitByte( inBuf[0] );
		EmitByte( 0 );		// dummy, never used
		EmitByte( 0 );		// 6 alpha index bytes all use the first alpha
		EmitByte( 0 );
		EmitByte( 0 );
		EmitByte( 0 );
		EmitByte( 0 );
		EmitByte( 0 );
		
		EmitByte( inBuf[1] );
		EmitByte( 0 );		// dummy, never used
		EmitByte( 0 );		// 6 alpha index bytes all use the first alpha
		EmitByte( 0 );
		EmitByte( 0 );
		EmitByte( 0 );
		EmitByte( 0 );
		EmitByte( 0 );
		
		inBuf += stride;
	}
}

/*
========================
idDxtEncoder::WriteTinyDXT5A
========================
*/
void idDxtEncoder::WriteTinyDXT5A( const byte* inBuf, int width, int height )
{
	int numBlocks = ( ( width + 3 ) / 4 ) * ( ( height + 3 ) / 4 );
	int stride = ( ( width * height ) / numBlocks ) * 4;	// number of bytes from one block to the next
	// example: 2x8 pixels
	// numBlocks = 2
	// stride = 32 bytes (8 pixels)
	
	for( int i = 0; i < numBlocks; i++ )
	{
		// FIXME: This just emits a fake block based on the normal at position 0,0
		EmitByte( inBuf[0] );
		EmitByte( 0 );		// dummy, never used
		EmitByte( 0 );		// 6 alpha index bytes all use the first alpha
		EmitByte( 0 );
		EmitByte( 0 );
		EmitByte( 0 );
		EmitByte( 0 );
		EmitByte( 0 );
		
		inBuf += stride;
	}
}

/*
========================
idDxtEncoder::ExtractBlock

params:	inPtr		- input image, 4 bytes per pixel
paramO:	colorBlock	- 4*4 output tile, 4 bytes per pixel
========================
*/
ID_INLINE void idDxtEncoder::ExtractBlock( const byte* inPtr, int width, byte* colorBlock ) const
{
	for( int j = 0; j < 4; j++ )
	{
		memcpy( &colorBlock[j * 4 * 4], inPtr, 4 * 4 );
		inPtr += width * 4;
	}
}

/*
========================
SwapColors
========================
*/
void SwapColors( byte* c1, byte* c2 )
{
	byte tm[3];
	memcpy( tm, c1, 3 );
	memcpy( c1, c2, 3 );
	memcpy( c2, tm, 3 );
}

/*
========================
idDxtEncoder::GetMinMaxColorsMaxDist

Finds the two RGB colors in a 4x4 block furthest apart. Also finds the two alpha values
furthest apart.

params: colorBlock	- 4*4 input tile, 4 bytes per pixel
paramO:	minColor	- 4 byte min color
paramO:	maxColor	- 4 byte max color
========================
*/
void idDxtEncoder::GetMinMaxColorsMaxDist( const byte* colorBlock, byte* minColor, byte* maxColor ) const
{
	int maxDistC = -1;
	int maxDistA = -1;
	
	for( int i = 0; i < 64 - 4; i += 4 )
	{
		for( int j = i + 4; j < 64; j += 4 )
		{
			int dc = ColorDistance( &colorBlock[i], &colorBlock[j] );
			if( dc > maxDistC )
			{
				maxDistC = dc;
				memcpy( minColor, colorBlock + i, 3 );
				memcpy( maxColor, colorBlock + j, 3 );
			}
			int da = AlphaDistance( colorBlock[i + 3], colorBlock[j + 3] );
			if( da > maxDistA )
			{
				maxDistA = da;
				minColor[3] = colorBlock[i + 3];
				maxColor[3] = colorBlock[j + 3];
			}
		}
	}
	if( maxColor[0] < minColor[0] )
	{
		SwapColors( minColor, maxColor );
	}
}

/*
========================
idDxtEncoder::GetMinMaxColorsLuminance

Finds the two RGB colors in a 4x4 block furthest apart based on luminance. Also finds the two
alpha values furthest apart.

params: colorBlock	- 4*4 input tile, 4 bytes per pixel
paramO:	minColor	- 4 byte min color
paramO:	maxColor	- 4 byte max color
========================
*/
void idDxtEncoder::GetMinMaxColorsLuminance( const byte* colorBlock, byte* minColor, byte* maxColor ) const
{
	int maxLumC = 0, minLumC = 256 * 4;
	int maxAlpha = 0, minAlpha = 256 * 4;
	
	for( int i = 0; i < 16; i++ )
	{
		int luminance = colorBlock[i * 4 + 0] + colorBlock[i * 4 + 1] * 2 + colorBlock[i * 4 + 2];
		if( luminance > maxLumC )
		{
			maxLumC = luminance;
			memcpy( maxColor, colorBlock + i * 4, 3 );
		}
		if( luminance < minLumC )
		{
			minLumC = luminance;
			memcpy( minColor, colorBlock + i * 4, 3 );
		}
		int alpha = colorBlock[i * 4 + 3];
		if( alpha > maxAlpha )
		{
			maxAlpha = alpha;
			maxColor[3] = ( byte )alpha;
		}
		if( alpha < minAlpha )
		{
			minAlpha = alpha;
			minColor[3] = ( byte )alpha;
		}
	}
	if( maxColor[0] < minColor[0] )
	{
		SwapColors( minColor, maxColor );
	}
}

/*
========================
idDxtEncoder::GetSquareAlphaError

params:	colorBlock	- 16 pixel block for which to find color indexes
paramO:	minAlpha	- Min alpha found
paramO:	maxAlpha	- Max alpha found
return: 4 byte color index block
========================
*/
int idDxtEncoder::GetSquareAlphaError( const byte* colorBlock, const int alphaOffset, const byte minAlpha, const byte maxAlpha, int lastError ) const
{
	int i, j;
	byte alphas[8];
	
	alphas[0] = maxAlpha;
	alphas[1] = minAlpha;
	
	if( maxAlpha > minAlpha )
	{
		alphas[2] = ( 6 * alphas[0] + 1 * alphas[1] ) / 7;
		alphas[3] = ( 5 * alphas[0] + 2 * alphas[1] ) / 7;
		alphas[4] = ( 4 * alphas[0] + 3 * alphas[1] ) / 7;
		alphas[5] = ( 3 * alphas[0] + 4 * alphas[1] ) / 7;
		alphas[6] = ( 2 * alphas[0] + 5 * alphas[1] ) / 7;
		alphas[7] = ( 1 * alphas[0] + 6 * alphas[1] ) / 7;
	}
	else
	{
		alphas[2] = ( 4 * alphas[0] + 1 * alphas[1] ) / 5;
		alphas[3] = ( 3 * alphas[0] + 2 * alphas[1] ) / 5;
		alphas[4] = ( 2 * alphas[0] + 3 * alphas[1] ) / 5;
		alphas[5] = ( 1 * alphas[0] + 4 * alphas[1] ) / 5;
		alphas[6] = 0;
		alphas[7] = 255;
	}
	
	int error = 0;
	for( i = 0; i < 16; i++ )
	{
		unsigned int minDist = MAX_UNSIGNED_TYPE( int );
		byte a = colorBlock[i * 4 + alphaOffset];
		for( j = 0; j < 8; j++ )
		{
			unsigned int dist = AlphaDistance( a, alphas[j] );
			if( dist < minDist )
			{
				minDist = dist;
			}
		}
		error += minDist;
		
		if( error >= lastError )
		{
			return error;
		}
	}
	
	return error;
}

/*
========================
idDxtEncoder::GetMinMaxAlphaHQ

params:	colorBlock	- 4*4 input tile, 4 bytes per pixel
paramO:	minColor		- 4 byte min color found
paramO:	maxColor		- 4 byte max color found
========================
*/
int idDxtEncoder::GetMinMaxAlphaHQ( const byte* colorBlock, const int alphaOffset, byte* minColor, byte* maxColor ) const
{
	int i, j;
	byte alphaMin, alphaMax;
	int error, bestError = MAX_TYPE( int );
	
	alphaMin = 255;
	alphaMax = 0;
	
	// get alpha min / max
	for( i = 0; i < 16; i++ )
	{
		if( colorBlock[i * 4 + alphaOffset] < alphaMin )
		{
			alphaMin = colorBlock[i * 4 + alphaOffset];
		}
		if( colorBlock[i * 4 + alphaOffset] > alphaMax )
		{
			alphaMax = colorBlock[i * 4 + alphaOffset];
		}
	}
	
	const int ALPHA_EXPAND = 32;
	
	alphaMin = ( alphaMin <= ALPHA_EXPAND ) ? 0 : alphaMin - ALPHA_EXPAND;
	alphaMax = ( alphaMax >= 255 - ALPHA_EXPAND ) ? 255 : alphaMax + ALPHA_EXPAND;
	
	for( i = alphaMin; i <= alphaMax; i++ )
	{
		for( j = alphaMax; j >= i; j-- )
		{
		
			error = GetSquareAlphaError( colorBlock, alphaOffset, ( byte )i, ( byte )j, bestError );
			if( error < bestError )
			{
				bestError = error;
				minColor[alphaOffset] = ( byte )i;
				maxColor[alphaOffset] = ( byte )j;
			}
			
			error = GetSquareAlphaError( colorBlock, alphaOffset, ( byte )j, ( byte )i, bestError );
			if( error < bestError )
			{
				bestError = error;
				minColor[alphaOffset] = ( byte )i;
				maxColor[alphaOffset] = ( byte )j;
			}
		}
	}
	
	return bestError;
}

/*
========================
idDxtEncoder::GetSquareColorsError

params:	colorBlock	- 16 pixel block for which to find color indexes
paramO:	color0		- 4 byte min color found
paramO:	color1		- 4 byte max color found
return: 4 byte color index block
========================
*/
int idDxtEncoder::GetSquareColorsError( const byte* colorBlock, const unsigned short color0, const unsigned short color1, int lastError ) const
{
	int i, j;
	byte colors[4][4];
	
	ColorFrom565( color0, colors[0] );
	ColorFrom565( color1, colors[1] );
	
	if( color0 > color1 )
	{
		colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
		colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
		colors[2][2] = ( 2 * colors[0][2] + 1 * colors[1][2] ) / 3;
		colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
		colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
		colors[3][2] = ( 1 * colors[0][2] + 2 * colors[1][2] ) / 3;
	}
	else
	{
		colors[2][0] = ( 1 * colors[0][0] + 1 * colors[1][0] ) / 2;
		colors[2][1] = ( 1 * colors[0][1] + 1 * colors[1][1] ) / 2;
		colors[2][2] = ( 1 * colors[0][2] + 1 * colors[1][2] ) / 2;
		colors[3][0] = 0;
		colors[3][1] = 0;
		colors[3][2] = 0;
	}
	
	int error = 0;
	for( i = 0; i < 16; i++ )
	{
		unsigned int minDist = MAX_UNSIGNED_TYPE( int );
		for( j = 0; j < 4; j++ )
		{
			unsigned int dist = ColorDistance( &colorBlock[i * 4], &colors[j][0] );
			if( dist < minDist )
			{
				minDist = dist;
			}
		}
		// accumulated error
		error += minDist;
		
		if( error > lastError )
		{
			return error;
		}
	}
	return error;
}

/*
========================
idDxtEncoder::GetSquareNormalYError

params:	colorBlock	- 16 pixel block for which to find color indexes
paramO:	color0		- 4 byte min color found
paramO:	color1		- 4 byte max color found
return: 4 byte color index block
========================
*/
int idDxtEncoder::GetSquareNormalYError( const byte* colorBlock, const unsigned short color0, const unsigned short color1, int lastError, int scale ) const
{
	int i, j;
	byte colors[4][4];
	
	ColorFrom565( color0, colors[0] );
	ColorFrom565( color1, colors[1] );
	
	if( color0 > color1 )
	{
		colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
		colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
		colors[2][2] = ( 2 * colors[0][2] + 1 * colors[1][2] ) / 3;
		colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
		colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
		colors[3][2] = ( 1 * colors[0][2] + 2 * colors[1][2] ) / 3;
	}
	else
	{
		colors[2][0] = ( 1 * colors[0][0] + 1 * colors[1][0] ) / 2;
		colors[2][1] = ( 1 * colors[0][1] + 1 * colors[1][1] ) / 2;
		colors[2][2] = ( 1 * colors[0][2] + 1 * colors[1][2] ) / 2;
		colors[3][0] = 0;
		colors[3][1] = 0;
		colors[3][2] = 0;
	}
	
	int error = 0;
	for( i = 0; i < 16; i++ )
	{
		unsigned int minDist = MAX_UNSIGNED_TYPE( int );
		for( j = 0; j < 4; j++ )
		{
			float r = ( float ) colorBlock[i * 4 + 1] / scale;
			float s = ( float ) colors[j][1] / scale;
			unsigned int dist = idMath::Ftoi( ( r - s ) * ( r - s ) );
			if( dist < minDist )
			{
				minDist = dist;
			}
		}
		// accumulated error
		error += minDist;
		
		if( error > lastError )
		{
			return error;
		}
	}
	return error;
}

/*
========================
idDxtEncoder::GetMinMaxColorsHQ

Uses an exhaustive search to find the two RGB colors that produce the least error when used to
compress the 4x4 block. Also finds the minimum and maximum alpha values.

params:	colorBlock	- 4*4 input tile, 4 bytes per pixel
paramO:	minColor	- 4 byte min color found
paramO:	maxColor	- 4 byte max color found
========================
*/
int idDxtEncoder::GetMinMaxColorsHQ( const byte* colorBlock, byte* minColor, byte* maxColor, bool noBlack ) const
{
	int i;
	int i0, i1, i2, j0, j1, j2;
	unsigned short minColor565, maxColor565, bestMinColor565, bestMaxColor565;
	byte bboxMin[3], bboxMax[3], minAxisDist[3];
	int error, bestError = MAX_TYPE( int );
	
	bboxMin[0] = bboxMin[1] = bboxMin[2] = 255;
	bboxMax[0] = bboxMax[1] = bboxMax[2] = 0;
	
	// get color bbox
	for( i = 0; i < 16; i++ )
	{
		if( colorBlock[i * 4 + 0] < bboxMin[0] )
		{
			bboxMin[0] = colorBlock[i * 4 + 0];
		}
		if( colorBlock[i * 4 + 1] < bboxMin[1] )
		{
			bboxMin[1] = colorBlock[i * 4 + 1];
		}
		if( colorBlock[i * 4 + 2] < bboxMin[2] )
		{
			bboxMin[2] = colorBlock[i * 4 + 2];
		}
		if( colorBlock[i * 4 + 0] > bboxMax[0] )
		{
			bboxMax[0] = colorBlock[i * 4 + 0];
		}
		if( colorBlock[i * 4 + 1] > bboxMax[1] )
		{
			bboxMax[1] = colorBlock[i * 4 + 1];
		}
		if( colorBlock[i * 4 + 2] > bboxMax[2] )
		{
			bboxMax[2] = colorBlock[i * 4 + 2];
		}
	}
	
	// decrease range for 565 encoding
	bboxMin[0] >>= 3;
	bboxMin[1] >>= 2;
	bboxMin[2] >>= 3;
	bboxMax[0] >>= 3;
	bboxMax[1] >>= 2;
	bboxMax[2] >>= 3;
	
	// get the minimum distance the end points of the line must be apart along each axis
	for( i = 0; i < 3; i++ )
	{
		minAxisDist[i] = ( bboxMax[i] - bboxMin[i] );
		if( minAxisDist[i] >= 16 )
		{
			minAxisDist[i] = minAxisDist[i] * 3 / 4;
		}
		else if( minAxisDist[i] >= 8 )
		{
			minAxisDist[i] = minAxisDist[i] * 2 / 4;
		}
		else if( minAxisDist[i] >= 4 )
		{
			minAxisDist[i] = minAxisDist[i] * 1 / 4;
		}
		else
		{
			minAxisDist[i] = 0;
		}
	}
	
	// expand the bounding box
	const int C565_BBOX_EXPAND = 1;
	
	bboxMin[0] = ( bboxMin[0] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[0] - C565_BBOX_EXPAND;
	bboxMin[1] = ( bboxMin[1] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[1] - C565_BBOX_EXPAND;
	bboxMin[2] = ( bboxMin[2] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[2] - C565_BBOX_EXPAND;
	bboxMax[0] = ( bboxMax[0] >= ( 255 >> 3 ) - C565_BBOX_EXPAND ) ? ( 255 >> 3 ) : bboxMax[0] + C565_BBOX_EXPAND;
	bboxMax[1] = ( bboxMax[1] >= ( 255 >> 2 ) - C565_BBOX_EXPAND ) ? ( 255 >> 2 ) : bboxMax[1] + C565_BBOX_EXPAND;
	bboxMax[2] = ( bboxMax[2] >= ( 255 >> 3 ) - C565_BBOX_EXPAND ) ? ( 255 >> 3 ) : bboxMax[2] + C565_BBOX_EXPAND;
	
	bestMinColor565 = 0;
	bestMaxColor565 = 0;
	
	for( i0 = bboxMin[0]; i0 <= bboxMax[0]; i0++ )
	{
		for( j0 = bboxMax[0]; j0 >= bboxMin[0]; j0-- )
		{
			if( abs( i0 - j0 ) < minAxisDist[0] )
			{
				continue;
			}
			
			for( i1 = bboxMin[1]; i1 <= bboxMax[1]; i1++ )
			{
				for( j1 = bboxMax[1]; j1 >= bboxMin[1]; j1-- )
				{
					if( abs( i1 - j1 ) < minAxisDist[1] )
					{
						continue;
					}
					
					for( i2 = bboxMin[2]; i2 <= bboxMax[2]; i2++ )
					{
						for( j2 = bboxMax[2]; j2 >= bboxMin[2]; j2-- )
						{
							if( abs( i2 - j2 ) < minAxisDist[2] )
							{
								continue;
							}
							
							minColor565 = ( unsigned short )( ( i0 << 11 ) | ( i1 << 5 ) | ( i2 << 0 ) );
							maxColor565 = ( unsigned short )( ( j0 << 11 ) | ( j1 << 5 ) | ( j2 << 0 ) );
							
							if( !noBlack )
							{
								error = GetSquareColorsError( colorBlock, maxColor565, minColor565, bestError );
								if( error < bestError )
								{
									bestError = error;
									bestMinColor565 = minColor565;
									bestMaxColor565 = maxColor565;
								}
							}
							else
							{
								if( minColor565 <= maxColor565 )
								{
									SwapValues( minColor565, maxColor565 );
								}
							}
							
							error = GetSquareColorsError( colorBlock, minColor565, maxColor565, bestError );
							if( error < bestError )
							{
								bestError = error;
								bestMinColor565 = minColor565;
								bestMaxColor565 = maxColor565;
							}
						}
					}
				}
			}
		}
	}
	
	ColorFrom565( bestMinColor565, minColor );
	ColorFrom565( bestMaxColor565, maxColor );
	
	return bestError;
}

/*
========================
idDxtEncoder::GetSquareCTX1Error

params:	colorBlock	- 16 pixel block for which to find color indexes
paramO:	color0		- Min color found
paramO:	color1		- Max color found
return: 4 byte color index block
========================
*/
int idDxtEncoder::GetSquareCTX1Error( const byte* colorBlock, const byte* color0, const byte* color1, int lastError ) const
{
	int i, j;
	byte colors[4][4];
	
	colors[0][0] = color0[0];
	colors[0][1] = color0[1];
	colors[1][0] = color1[0];
	colors[1][1] = color1[1];
	
	colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
	colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
	colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
	colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
	
	int error = 0;
	for( i = 0; i < 16; i++ )
	{
		unsigned int minDist = MAX_UNSIGNED_TYPE( int );
		for( j = 0; j < 4; j++ )
		{
			unsigned int dist = CTX1Distance( &colorBlock[i * 4], &colors[j][0] );
			if( dist < minDist )
			{
				minDist = dist;
			}
		}
		// accumulated error
		error += minDist;
		
		if( error > lastError )
		{
			return error;
		}
	}
	return error;
}

/*
========================
idDxtEncoder::GetMinMaxCTX1HQ

Uses an exhaustive search to find the two RGB colors that produce the least error when used to
compress the 4x4 block. Also finds the minimum and maximum alpha values.

params:	colorBlock	- 4*4 input tile, 4 bytes per pixel
paramO:	minColor	- 4 byte Min color found
paramO:	maxColor	- 4 byte Max color found
========================
*/
int idDxtEncoder::GetMinMaxCTX1HQ( const byte* colorBlock, byte* minColor, byte* maxColor ) const
{
	int i;
	int i0, i1, j0, j1;
	byte curMinColor[2], curMaxColor[2];
	byte bboxMin[2], bboxMax[2], minAxisDist[2];
	int error, bestError = MAX_TYPE( int );
	
	bboxMin[0] = bboxMin[1] = 255;
	bboxMax[0] = bboxMax[1] = 0;
	
	// get color bbox
	for( i = 0; i < 16; i++ )
	{
		if( colorBlock[i * 4 + 0] < bboxMin[0] )
		{
			bboxMin[0] = colorBlock[i * 4 + 0];
		}
		if( colorBlock[i * 4 + 1] < bboxMin[1] )
		{
			bboxMin[1] = colorBlock[i * 4 + 1];
		}
		if( colorBlock[i * 4 + 0] > bboxMax[0] )
		{
			bboxMax[0] = colorBlock[i * 4 + 0];
		}
		if( colorBlock[i * 4 + 1] > bboxMax[1] )
		{
			bboxMax[1] = colorBlock[i * 4 + 1];
		}
	}
	
	// get the minimum distance the end points of the line must be apart along each axis
	for( i = 0; i < 2; i++ )
	{
		minAxisDist[i] = ( bboxMax[i] - bboxMin[i] );
		if( minAxisDist[i] >= 64 )
		{
			minAxisDist[i] = minAxisDist[i] * 3 / 4;
		}
		else if( minAxisDist[i] >= 32 )
		{
			minAxisDist[i] = minAxisDist[i] * 2 / 4;
		}
		else if( minAxisDist[i] >= 16 )
		{
			minAxisDist[i] = minAxisDist[i] * 1 / 4;
		}
		else
		{
			minAxisDist[i] = 0;
		}
	}
	
	// expand the bounding box
	const int CXT1_BBOX_EXPAND = 6;
	
	bboxMin[0] = ( bboxMin[0] <= CXT1_BBOX_EXPAND ) ? 0 : bboxMin[0] - CXT1_BBOX_EXPAND;
	bboxMin[1] = ( bboxMin[1] <= CXT1_BBOX_EXPAND ) ? 0 : bboxMin[1] - CXT1_BBOX_EXPAND;
	bboxMax[0] = ( bboxMax[0] >= 255 - CXT1_BBOX_EXPAND ) ? 255 : bboxMax[0] + CXT1_BBOX_EXPAND;
	bboxMax[1] = ( bboxMax[1] >= 255 - CXT1_BBOX_EXPAND ) ? 255 : bboxMax[1] + CXT1_BBOX_EXPAND;
	
	for( i0 = bboxMin[0]; i0 <= bboxMax[0]; i0++ )
	{
		for( j0 = bboxMax[0]; j0 >= bboxMin[0]; j0-- )
		{
			if( abs( i0 - j0 ) < minAxisDist[0] )
			{
				continue;
			}
			
			for( i1 = bboxMin[1]; i1 <= bboxMax[1]; i1++ )
			{
				for( j1 = bboxMax[1]; j1 >= bboxMin[1]; j1-- )
				{
					if( abs( i1 - j1 ) < minAxisDist[1] )
					{
						continue;
					}
					
					curMinColor[0] = ( byte )i0;
					curMinColor[1] = ( byte )i1;
					
					curMaxColor[0] = ( byte )j0;
					curMaxColor[1] = ( byte )j1;
					
					error = GetSquareCTX1Error( colorBlock, curMinColor, curMaxColor, bestError );
					if( error < bestError )
					{
						bestError = error;
						memcpy( minColor, curMinColor, 2 );
						memcpy( maxColor, curMaxColor, 2 );
					}
				}
			}
		}
	}
	
	return bestError;
}

/*
========================
idDxtEncoder::GetMinMaxNormalYHQ

Uses an exhaustive search to find the two RGB colors that produce the least error when used to
compress the 4x4 block. Also finds the minimum and maximum alpha values.

params:	colorBlock	- 4*4 input tile, 4 bytes per pixel
paramO:	minColor	- 4 byte Min color found
paramO:	maxColor	- 4 byte Max color found
========================
*/
int idDxtEncoder::GetMinMaxNormalYHQ( const byte* colorBlock, byte* minColor, byte* maxColor, bool noBlack, int scale ) const
{
	unsigned short bestMinColor565, bestMaxColor565;
	byte bboxMin[3], bboxMax[3];
	int error, bestError = MAX_TYPE( int );
	
	bboxMin[1] = 255;
	bboxMax[1] = 0;
	
	// get color bbox
	for( int i = 0; i < 16; i++ )
	{
		if( colorBlock[i * 4 + 1] < bboxMin[1] )
		{
			bboxMin[1] = colorBlock[i * 4 + 1];
		}
		if( colorBlock[i * 4 + 1] > bboxMax[1] )
		{
			bboxMax[1] = colorBlock[i * 4 + 1];
		}
	}
	
	// decrease range for 565 encoding
	bboxMin[1] >>= 2;
	bboxMax[1] >>= 2;
	
	// expand the bounding box
	const int C565_BBOX_EXPAND = 1;
	
	bboxMin[1] = ( bboxMin[1] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[1] - C565_BBOX_EXPAND;
	bboxMax[1] = ( bboxMax[1] >= ( 255 >> 2 ) - C565_BBOX_EXPAND ) ? ( 255 >> 2 ) : bboxMax[1] + C565_BBOX_EXPAND;
	
	bestMinColor565 = 0;
	bestMaxColor565 = 0;
	
	for( int i1 = bboxMin[1]; i1 <= bboxMax[1]; i1++ )
	{
		for( int j1 = bboxMax[1]; j1 >= bboxMin[1]; j1-- )
		{
			if( abs( i1 - j1 ) < 0 )
			{
				continue;
			}
			
			unsigned short minColor565 = ( unsigned short )i1 << 5;
			unsigned short maxColor565 = ( unsigned short )j1 << 5;
			
			if( !noBlack )
			{
				error = GetSquareNormalYError( colorBlock, maxColor565, minColor565, bestError, scale );
				if( error < bestError )
				{
					bestError = error;
					bestMinColor565 = minColor565;
					bestMaxColor565 = maxColor565;
				}
			}
			else
			{
				if( minColor565 <= maxColor565 )
				{
					SwapValues( minColor565, maxColor565 );
				}
			}
			
			error = GetSquareNormalYError( colorBlock, minColor565, maxColor565, bestError, scale );
			if( error < bestError )
			{
				bestError = error;
				bestMinColor565 = minColor565;
				bestMaxColor565 = maxColor565;
			}
		}
	}
	
	ColorFrom565( bestMinColor565, minColor );
	ColorFrom565( bestMaxColor565, maxColor );
	
	int bias = colorBlock[0 * 4 + 0];
	int size = colorBlock[0 * 4 + 2];
	
	minColor[0] = maxColor[0] = ( byte )bias;
	minColor[2] = maxColor[2] = ( byte )size;
	
	return bestError;
}

ALIGN16( static float SIMD_SSE2_float_scale[4] ) = { 2.0f / 255.0f, 2.0f / 255.0f, 2.0f / 255.0f, 2.0f / 255.0f };
ALIGN16( static float SIMD_SSE2_float_descale[4] ) = { 255.0f / 2.0f, 255.0f / 2.0f, 255.0f / 2.0f, 255.0f / 2.0f };
ALIGN16( static float SIMD_SSE2_float_zero[4] ) = { 0.0f, 0.0f, 0.0f, 0.0f };
ALIGN16( static float SIMD_SSE2_float_one[4] ) = { 1.0f, 1.0f, 1.0f, 1.0f };
ALIGN16( static float SIMD_SSE2_float_half[4] ) = { 0.5f, 0.5f, 0.5f, 0.5f };
ALIGN16( static float SIMD_SSE2_float_255[4] ) = { 255.0f, 255.0f, 255.0f, 255.0f };
ALIGN16( static float SIMD_SP_rsqrt_c0[4] ) = { 3.0f, 3.0f, 3.0f, 3.0f };
ALIGN16( static float SIMD_SP_rsqrt_c1[4] ) = { -0.5f, -0.5f, -0.5f, -0.5f };
ALIGN16( static dword SIMD_SSE2_dword_maskFirstThree[4] ) = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 };
ALIGN16( static dword SIMD_SSE2_dword_maskWords[4] ) = { 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x00000000 };
#define R_SHUFFLE_PS( x, y, z, w )	(( (w) & 3 ) << 6 | ( (z) & 3 ) << 4 | ( (y) & 3 ) << 2 | ( (x) & 3 ))

/*
========================
NormalDistanceDXT1
========================
*/
int NormalDistanceDXT1( const int* vector, const int* normalized )
{
#if defined(_MSC_VER) && defined(_M_IX86)
	int result;
	__asm
	{
		mov			esi, vector
		mov			edi, normalized
		cvtdq2ps	xmm0, [esi]
		mulps		xmm0, SIMD_SSE2_float_scale
		subps		xmm0, SIMD_SSE2_float_one
		pand		xmm0, SIMD_SSE2_dword_maskFirstThree
		movaps		xmm1, xmm0
		mulps		xmm1, xmm1
		pshufd		xmm2, xmm1, R_SHUFFLE_PS( 2, 3, 0, 1 )
		addps		xmm2, xmm1
		pshufd		xmm1, xmm2, R_SHUFFLE_PS( 1, 0, 1, 0 )
		addps		xmm2, xmm1
		
		rsqrtps		xmm1, xmm2
		mulps		xmm2, xmm1
		mulps		xmm2, xmm1
		subps		xmm2, SIMD_SP_rsqrt_c0
		mulps		xmm1, SIMD_SP_rsqrt_c1
		mulps		xmm2, xmm1
		
		mulps		xmm0, xmm2
		addps		xmm0, SIMD_SSE2_float_one
		mulps		xmm0, SIMD_SSE2_float_descale
		addps		xmm0, SIMD_SSE2_float_half
		maxps		xmm0, SIMD_SSE2_float_zero
		minps		xmm0, SIMD_SSE2_float_255
		cvttps2dq	xmm0, xmm0
		psubd		xmm0, [edi]
		pand		xmm0, SIMD_SSE2_dword_maskWords
		pmullw		xmm0, xmm0
		pshufd		xmm1, xmm0, R_SHUFFLE_PS( 2, 3, 0, 1 )
		paddd		xmm0, xmm1
		pshufd		xmm1, xmm0, R_SHUFFLE_PS( 1, 0, 1, 0 )
		paddd		xmm0, xmm1
		movd		result, xmm0
	}
	return result;
#else // not _MSC_VERSION && defined(_M_IX86)
	// DG: alternative implementation for non-MSVC builds
	return 0; // FIXME: implementation!!
	// DG end
#endif // _MSC_VERSION && defined(_M_IX86)
}

/*
========================
NormalDistanceDXT5
========================
*/
int NormalDistanceDXT5( const int* vector, const int* normalized )
{
#if _MSC_VER && defined(_M_IX86)
	int result;
	__asm
	{
		mov			esi, vector
		mov			edi, normalized
#if 0	// object-space
		pshufd		xmm0, [esi], R_SHUFFLE_PS( 0, 1, 3, 2 )
#else
		pshufd		xmm0, [esi], R_SHUFFLE_PS( 1, 2, 3, 0 )
#endif
		cvtdq2ps	xmm0, xmm0
		mulps		xmm0, SIMD_SSE2_float_scale
		subps		xmm0, SIMD_SSE2_float_one
		pand		xmm0, SIMD_SSE2_dword_maskFirstThree
		movaps		xmm1, xmm0
		mulps		xmm1, xmm1
		pshufd		xmm2, xmm1, R_SHUFFLE_PS( 2, 3, 0, 1 )
		addps		xmm2, xmm1
		pshufd		xmm1, xmm2, R_SHUFFLE_PS( 1, 0, 1, 0 )
		addps		xmm2, xmm1
		
		rsqrtps		xmm1, xmm2
		mulps		xmm2, xmm1
		mulps		xmm2, xmm1
		subps		xmm2, SIMD_SP_rsqrt_c0
		mulps		xmm1, SIMD_SP_rsqrt_c1
		mulps		xmm2, xmm1
		
		mulps		xmm0, xmm2
		addps		xmm0, SIMD_SSE2_float_one
		mulps		xmm0, SIMD_SSE2_float_descale
		addps		xmm0, SIMD_SSE2_float_half
		maxps		xmm0, SIMD_SSE2_float_zero
		minps		xmm0, SIMD_SSE2_float_255
		cvttps2dq	xmm0, xmm0
#if 0	// object-space
		pshufd		xmm3, [edi], R_SHUFFLE_PS( 0, 1, 3, 2 )
#else
		pshufd		xmm3, [edi], R_SHUFFLE_PS( 1, 2, 3, 0 )
#endif
		psubd		xmm0, xmm3
		pand		xmm0, SIMD_SSE2_dword_maskWords
		pmullw		xmm0, xmm0
		pshufd		xmm1, xmm0, R_SHUFFLE_PS( 2, 3, 0, 1 )
		paddd		xmm0, xmm1
		pshufd		xmm1, xmm0, R_SHUFFLE_PS( 1, 0, 1, 0 )
		paddd		xmm0, xmm1
		movd		result, xmm0
	}
	return result;
#else // not _MSC_VER && defined(_M_IX86)
	// DG: alternative implementation for non-MSVC builds
	return 0; // FIXME: implementation!!
	// DG end
#endif // _MSC_VER && defined(_M_IX86)
}

/*
========================
idDxtEncoder::GetSquareNormalsDXT1Error

params:	colorBlock	- 4*4 input tile, 4 bytes per pixel
paramO:	color0		- 4 byte Min color found
paramO:	color1		- 4 byte Max color found
return: 4 byte color index block
========================
*/
int idDxtEncoder::GetSquareNormalsDXT1Error( const int* colorBlock, const unsigned short color0, const unsigned short color1, int lastError, unsigned int& colorIndices ) const
{
	byte byteColors[2][4];
	ALIGN16( int colors[4][4] );
	
	ColorFrom565( color0, byteColors[0] );
	ColorFrom565( color1, byteColors[1] );
	
	for( int i = 0; i < 4; i++ )
	{
		colors[0][i] = byteColors[0][i];
		colors[1][i] = byteColors[1][i];
	}
	
	if( color0 > color1 )
	{
		colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
		colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
		colors[2][2] = ( 2 * colors[0][2] + 1 * colors[1][2] ) / 3;
		colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
		colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
		colors[3][2] = ( 1 * colors[0][2] + 2 * colors[1][2] ) / 3;
	}
	else
	{
		assert( color0 == color1 );
		colors[2][0] = ( 1 * colors[0][0] + 1 * colors[1][0] ) / 2;
		colors[2][1] = ( 1 * colors[0][1] + 1 * colors[1][1] ) / 2;
		colors[2][2] = ( 1 * colors[0][2] + 1 * colors[1][2] ) / 2;
		colors[3][0] = 0;
		colors[3][1] = 0;
		colors[3][2] = 0;
	}
	
	int error = 0;
	int tempColorIndices[16];
	for( int i = 0; i < 16; i++ )
	{
		unsigned int minDist = MAX_UNSIGNED_TYPE( int );
		
		for( int j = 0; j < 4; j++ )
		{
			unsigned int dist = NormalDistanceDXT1( &colors[j][0], &colorBlock[i * 4] );
			if( dist < minDist )
			{
				minDist = dist;
				tempColorIndices[i] = j;
			}
		}
		// accumulated error
		error += minDist;
		
		if( error > lastError )
		{
			return error;
		}
	}
	
	colorIndices = 0;
	for( int i = 0; i < 16; i++ )
	{
		colorIndices |= ( tempColorIndices[i] << ( unsigned int )( i << 1 ) );
	}
	
	return error;
}

/*
========================
idDxtEncoder::GetMinMaxNormalsDXT1HQ

Uses an exhaustive search to find the two RGB colors that produce the least error when used to
compress the 4x4 block. Also finds the minimum and maximum alpha values.

params:	colorBlock	- 4*4 input tile, 4 bytes per pixel
paramO:	minColor	- 4 byte Min color found
paramO:	maxColor	- 4 byte Max color found
========================
*/
int idDxtEncoder::GetMinMaxNormalsDXT1HQ( const byte* colorBlock, byte* minColor, byte* maxColor, unsigned int& colorIndices, bool noBlack ) const
{
	int i;
	int i0, i1, i2, j0, j1, j2;
	unsigned short bestMinColor565 = 0;
	unsigned short bestMaxColor565 = 0;
	byte bboxMin[3], bboxMax[3], minAxisDist[3];
	int error, bestError = MAX_TYPE( int );
	unsigned int tempColorIndices;
	ALIGN16( int intColorBlock[16 * 4] );
	
	bboxMin[0] = bboxMin[1] = bboxMin[2] = 128;
	bboxMax[0] = bboxMax[1] = bboxMax[2] = 128;
	
	// get color bbox
	for( i = 0; i < 16; i++ )
	{
		if( colorBlock[i * 4 + 0] < bboxMin[0] )
		{
			bboxMin[0] = colorBlock[i * 4 + 0];
		}
		if( colorBlock[i * 4 + 1] < bboxMin[1] )
		{
			bboxMin[1] = colorBlock[i * 4 + 1];
		}
		if( colorBlock[i * 4 + 2] < bboxMin[2] )
		{
			bboxMin[2] = colorBlock[i * 4 + 2];
		}
		if( colorBlock[i * 4 + 0] > bboxMax[0] )
		{
			bboxMax[0] = colorBlock[i * 4 + 0];
		}
		if( colorBlock[i * 4 + 1] > bboxMax[1] )
		{
			bboxMax[1] = colorBlock[i * 4 + 1];
		}
		if( colorBlock[i * 4 + 2] > bboxMax[2] )
		{
			bboxMax[2] = colorBlock[i * 4 + 2];
		}
	}
	
	for( int i = 0; i < 64; i++ )
	{
		intColorBlock[i] = colorBlock[i];
	}
	
	// decrease range for 565 encoding
	bboxMin[0] >>= 3;
	bboxMin[1] >>= 2;
	bboxMin[2] >>= 3;
	bboxMax[0] >>= 3;
	bboxMax[1] >>= 2;
	bboxMax[2] >>= 3;
	
	// get the minimum distance the end points of the line must be apart along each axis
	for( i = 0; i < 3; i++ )
	{
		minAxisDist[i] = 0;
	}
	
	// expand the bounding box
	const int C565_BBOX_EXPAND = 2;
	
	bboxMin[0] = ( bboxMin[0] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[0] - C565_BBOX_EXPAND;
	bboxMin[1] = ( bboxMin[1] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[1] - C565_BBOX_EXPAND;
	bboxMin[2] = ( bboxMin[2] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[2] - C565_BBOX_EXPAND;
	bboxMax[0] = ( bboxMax[0] >= ( 255 >> 3 ) - C565_BBOX_EXPAND ) ? ( 255 >> 3 ) : bboxMax[0] + C565_BBOX_EXPAND;
	bboxMax[1] = ( bboxMax[1] >= ( 255 >> 2 ) - C565_BBOX_EXPAND ) ? ( 255 >> 2 ) : bboxMax[1] + C565_BBOX_EXPAND;
	bboxMax[2] = ( bboxMax[2] >= ( 255 >> 3 ) - C565_BBOX_EXPAND ) ? ( 255 >> 3 ) : bboxMax[2] + C565_BBOX_EXPAND;
	
	for( i0 = bboxMin[0]; i0 <= bboxMax[0]; i0++ )
	{
		for( j0 = bboxMax[0]; j0 >= bboxMin[0]; j0-- )
		{
			if( abs( i0 - j0 ) < minAxisDist[0] )
			{
				continue;
			}
			
			for( i1 = bboxMin[1]; i1 <= bboxMax[1]; i1++ )
			{
				for( j1 = bboxMax[1]; j1 >= bboxMin[1]; j1-- )
				{
					if( abs( i1 - j1 ) < minAxisDist[1] )
					{
						continue;
					}
					
					for( i2 = bboxMin[2]; i2 <= bboxMax[2]; i2++ )
					{
						for( j2 = bboxMax[2]; j2 >= bboxMin[2]; j2-- )
						{
							if( abs( i2 - j2 ) < minAxisDist[2] )
							{
								continue;
							}
							
							unsigned short minColor565 = ( unsigned short )( ( i0 << 11 ) | ( i1 << 5 ) | ( i2 << 0 ) );
							unsigned short maxColor565 = ( unsigned short )( ( j0 << 11 ) | ( j1 << 5 ) | ( j2 << 0 ) );
							
							if( !noBlack )
							{
								error = GetSquareNormalsDXT1Error( intColorBlock, maxColor565, minColor565, bestError, tempColorIndices );
								if( error < bestError )
								{
									bestError = error;
									bestMinColor565 = minColor565;
									bestMaxColor565 = maxColor565;
									colorIndices = tempColorIndices;
								}
							}
							else
							{
								if( minColor565 <= maxColor565 )
								{
									SwapValues( minColor565, maxColor565 );
								}
							}
							
							error = GetSquareNormalsDXT1Error( intColorBlock, minColor565, maxColor565, bestError, tempColorIndices );
							if( error < bestError )
							{
								bestError = error;
								bestMinColor565 = minColor565;
								bestMaxColor565 = maxColor565;
								colorIndices = tempColorIndices;
							}
						}
					}
				}
			}
		}
	}
	
	ColorFrom565( bestMinColor565, minColor );
	ColorFrom565( bestMaxColor565, maxColor );
	
	return bestError;
}

/*
========================
idDxtEncoder::GetSquareNormalsDXT5Error

params:	normalBlock	- 16 pixel block for which to find normal indexes
paramO:	minNormal	- Min normal found
paramO:	maxNormal	- Max normal found
========================
*/
int idDxtEncoder::GetSquareNormalsDXT5Error( const int* normalBlock, const byte* minNormal, const byte* maxNormal, int lastError, unsigned int& colorIndices, byte* alphaIndices ) const
{
	byte alphas[8];
	byte colors[4][4];
	
	unsigned short smin = ColorTo565( minNormal );
	unsigned short smax = ColorTo565( maxNormal );
	
	ColorFrom565( smax, colors[0] );
	ColorFrom565( smin, colors[1] );
	
	if( smax > smin )
	{
		colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
		colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
		colors[2][2] = ( 2 * colors[0][2] + 1 * colors[1][2] ) / 3;
		colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
		colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
		colors[3][2] = ( 1 * colors[0][2] + 2 * colors[1][2] ) / 3;
	}
	else
	{
		assert( smax == smin );
		colors[2][0] = ( 1 * colors[0][0] + 1 * colors[1][0] ) / 2;
		colors[2][1] = ( 1 * colors[0][1] + 1 * colors[1][1] ) / 2;
		colors[2][2] = ( 1 * colors[0][2] + 1 * colors[1][2] ) / 2;
		colors[3][0] = 0;
		colors[3][1] = 0;
		colors[3][2] = 0;
	}
	
	alphas[0] = maxNormal[3];
	alphas[1] = minNormal[3];
	
	if( maxNormal[3] > minNormal[3] )
	{
		alphas[2] = ( 6 * alphas[0] + 1 * alphas[1] ) / 7;
		alphas[3] = ( 5 * alphas[0] + 2 * alphas[1] ) / 7;
		alphas[4] = ( 4 * alphas[0] + 3 * alphas[1] ) / 7;
		alphas[5] = ( 3 * alphas[0] + 4 * alphas[1] ) / 7;
		alphas[6] = ( 2 * alphas[0] + 5 * alphas[1] ) / 7;
		alphas[7] = ( 1 * alphas[0] + 6 * alphas[1] ) / 7;
	}
	else
	{
		alphas[2] = ( 4 * alphas[0] + 1 * alphas[1] ) / 5;
		alphas[3] = ( 3 * alphas[0] + 2 * alphas[1] ) / 5;
		alphas[4] = ( 2 * alphas[0] + 3 * alphas[1] ) / 5;
		alphas[5] = ( 1 * alphas[0] + 4 * alphas[1] ) / 5;
		alphas[6] = 0;
		alphas[7] = 255;
	}
	
	int error = 0;
	int tempColorIndices[16];
	int tempAlphaIndices[16];
	for( int i = 0; i < 16; i++ )
	{
		ALIGN16( int normal[4] );
		unsigned int minDist = MAX_UNSIGNED_TYPE( int );
		
		for( int j = 0; j < 4; j++ )
		{
			normal[0] = colors[j][0];
			normal[1] = colors[j][1];
			normal[2] = colors[j][2];
			
			for( int k = 0; k < 8; k++ )
			{
				normal[3] = alphas[k];
				unsigned int dist = NormalDistanceDXT5( normal, &normalBlock[i * 4] );
				if( dist < minDist )
				{
					minDist = dist;
					tempColorIndices[i] = j;
					tempAlphaIndices[i] = k;
				}
			}
		}
		error += minDist;
		
		if( error >= lastError )
		{
			return error;
		}
	}
	
	alphaIndices[0] = byte( ( tempAlphaIndices[ 0] >> 0 ) | ( tempAlphaIndices[ 1] << 3 ) | ( tempAlphaIndices[ 2] << 6 ) );
	alphaIndices[1] = byte( ( tempAlphaIndices[ 2] >> 2 ) | ( tempAlphaIndices[ 3] << 1 ) | ( tempAlphaIndices[ 4] << 4 ) | ( tempAlphaIndices[ 5] << 7 ) );
	alphaIndices[2] = byte( ( tempAlphaIndices[ 5] >> 1 ) | ( tempAlphaIndices[ 6] << 2 ) | ( tempAlphaIndices[ 7] << 5 ) );
	
	alphaIndices[3] = byte( ( tempAlphaIndices[ 8] >> 0 ) | ( tempAlphaIndices[ 9] << 3 ) | ( tempAlphaIndices[10] << 6 ) );
	alphaIndices[4] = byte( ( tempAlphaIndices[10] >> 2 ) | ( tempAlphaIndices[11] << 1 ) | ( tempAlphaIndices[12] << 4 ) | ( tempAlphaIndices[13] << 7 ) );
	alphaIndices[5] = byte( ( tempAlphaIndices[13] >> 1 ) | ( tempAlphaIndices[14] << 2 ) | ( tempAlphaIndices[15] << 5 ) );
	
	colorIndices = 0;
	for( int i = 0; i < 16; i++ )
	{
		colorIndices |= ( tempColorIndices[i] << ( unsigned int )( i << 1 ) );
	}
	
	return error;
}

/*
========================
idDxtEncoder::GetMinMaxNormalsDXT5HQ

Uses an exhaustive search to find the two RGB colors that produce the least error when used to
compress the 4x4 block. Also finds the minimum and maximum alpha values.

params:	colorBlock	- 4*4 input tile, 4 bytes per pixel
paramO:	minColor	- 4 byte Min color found
paramO:	maxColor	- 4 byte Max color found
========================
*/
int idDxtEncoder::GetMinMaxNormalsDXT5HQ( const byte* colorBlock, byte* minColor, byte* maxColor, unsigned int& colorIndices, byte* alphaIndices ) const
{
	int i;
	int i0, i1, i3, j0, j1, j3;
	byte bboxMin[4], bboxMax[4], minAxisDist[4];
	byte tmin[4], tmax[4];
	int error, bestError = MAX_TYPE( int );
	unsigned int tempColorIndices;
	byte tempAlphaIndices[6];
	ALIGN16( int intColorBlock[16 * 4] );
	
	bboxMin[0] = bboxMin[1] = bboxMin[2] = bboxMin[3] = 255;
	bboxMax[0] = bboxMax[1] = bboxMax[2] = bboxMax[3] = 0;
	
	// get color bbox
	for( i = 0; i < 16; i++ )
	{
		if( colorBlock[i * 4 + 0] < bboxMin[0] )
		{
			bboxMin[0] = colorBlock[i * 4 + 0];
		}
		if( colorBlock[i * 4 + 1] < bboxMin[1] )
		{
			bboxMin[1] = colorBlock[i * 4 + 1];
		}
		if( colorBlock[i * 4 + 2] < bboxMin[2] )
		{
			bboxMin[2] = colorBlock[i * 4 + 2];
		}
		if( colorBlock[i * 4 + 3] < bboxMin[3] )
		{
			bboxMin[3] = colorBlock[i * 4 + 3];
		}
		if( colorBlock[i * 4 + 0] > bboxMax[0] )
		{
			bboxMax[0] = colorBlock[i * 4 + 0];
		}
		if( colorBlock[i * 4 + 1] > bboxMax[1] )
		{
			bboxMax[1] = colorBlock[i * 4 + 1];
		}
		if( colorBlock[i * 4 + 2] > bboxMax[2] )
		{
			bboxMax[2] = colorBlock[i * 4 + 2];
		}
		if( colorBlock[i * 4 + 3] > bboxMax[3] )
		{
			bboxMax[3] = colorBlock[i * 4 + 3];
		}
	}
	
	for( int i = 0; i < 64; i++ )
	{
		intColorBlock[i] = colorBlock[i];
	}
	
	// decrease range for 565 encoding
	bboxMin[0] >>= 3;
	bboxMin[1] >>= 2;
	bboxMax[0] >>= 3;
	bboxMax[1] >>= 2;
	
	// get the minimum distance the end points of the line must be apart along each axis
	for( i = 0; i < 4; i++ )
	{
		minAxisDist[i] = 0;
	}
	
	// expand the bounding box
	const int C565_BBOX_EXPAND = 2;
	const int ALPHA_BBOX_EXPAND = 32;
	
	bboxMin[0] = ( bboxMin[0] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[0] - C565_BBOX_EXPAND;
	bboxMin[1] = ( bboxMin[1] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[1] - C565_BBOX_EXPAND;
	bboxMin[3] = ( bboxMin[3] <= ALPHA_BBOX_EXPAND ) ? 0 : bboxMin[3] - ALPHA_BBOX_EXPAND;
	bboxMax[0] = ( bboxMax[0] >= ( 255 >> 3 ) - C565_BBOX_EXPAND ) ? ( 255 >> 3 ) : bboxMax[0] + C565_BBOX_EXPAND;
	bboxMax[1] = ( bboxMax[1] >= ( 255 >> 2 ) - C565_BBOX_EXPAND ) ? ( 255 >> 2 ) : bboxMax[1] + C565_BBOX_EXPAND;
	bboxMax[3] = ( bboxMax[3] >= ( 255 ) - ALPHA_BBOX_EXPAND ) ? ( 255 ) : bboxMax[3] + ALPHA_BBOX_EXPAND;
	
	for( i0 = bboxMin[0]; i0 <= bboxMax[0]; i0++ )
	{
		for( j0 = bboxMax[0]; j0 >= bboxMin[0]; j0-- )
		{
			if( abs( i0 - j0 ) < minAxisDist[0] )
			{
				continue;
			}
			
			for( i1 = bboxMin[1]; i1 <= bboxMax[1]; i1++ )
			{
				for( j1 = bboxMax[1]; j1 >= bboxMin[1]; j1-- )
				{
					if( abs( i1 - j1 ) < minAxisDist[1] )
					{
						continue;
					}
					
					tmin[0] = ( byte )j0 << 3;
					tmin[1] = ( byte )j1 << 2;
					tmin[2] = 0;
					
					tmax[0] = ( byte )i0 << 3;
					tmax[1] = ( byte )i1 << 2;
					tmax[2] = 0;
					
					for( i3 = bboxMin[3]; i3 <= bboxMax[3]; i3++ )
					{
						for( j3 = bboxMax[3]; j3 >= bboxMin[3]; j3-- )
						{
							if( abs( i3 - j3 ) < minAxisDist[3] )
							{
								continue;
							}
							
							tmin[3] = ( byte )j3;
							tmax[3] = ( byte )i3;
							
							error = GetSquareNormalsDXT5Error( intColorBlock, tmin, tmax, bestError, tempColorIndices, tempAlphaIndices );
							if( error < bestError )
							{
								bestError = error;
								memcpy( minColor, tmin, 4 );
								memcpy( maxColor, tmax, 4 );
								colorIndices = tempColorIndices;
								memcpy( alphaIndices, tempAlphaIndices, 6 );
							}
							
							tmin[3] = ( byte )i3;
							tmax[3] = ( byte )j3;
							
							error = GetSquareNormalsDXT5Error( intColorBlock, tmin, tmax, bestError, tempColorIndices, tempAlphaIndices );
							if( error < bestError )
							{
								bestError = error;
								memcpy( minColor, tmin, 4 );
								memcpy( maxColor, tmax, 4 );
								colorIndices = tempColorIndices;
								memcpy( alphaIndices, tempAlphaIndices, 6 );
							}
						}
					}
				}
			}
		}
	}
	
	return bestError;
}

/*
========================
idDxtEncoder::GetMinMaxNormalsDXT5HQFast

Uses an exhaustive search to find the two RGB colors that produce the least error when used to
compress the 4x4 block. Also finds the minimum and maximum alpha values.

params:	colorBlock	- 4*4 input tile, 4 bytes per pixel
paramO:	minColor	- 4 byte Min color found
paramO:	maxColor	- 4 byte Max color found
========================
*/
int idDxtEncoder::GetMinMaxNormalsDXT5HQFast( const byte* colorBlock, byte* minColor, byte* maxColor, unsigned int& colorIndices, byte* alphaIndices ) const
{
	int i0, i1, i2, i3, j0, j1, j2, j3;
	byte bboxMin[4], bboxMax[4], minAxisDist[4];
	byte tmin[4], tmax[4];
	int error, bestError = MAX_TYPE( int );
	unsigned int tempColorIndices;
	byte tempAlphaIndices[6];
	ALIGN16( int intColorBlock[16 * 4] );
	
	bboxMin[0] = bboxMin[1] = bboxMin[2] = bboxMin[3] = 255;
	bboxMax[0] = bboxMax[1] = bboxMax[2] = bboxMax[3] = 0;
	
	// get color bbox
	for( int i = 0; i < 16; i++ )
	{
		if( colorBlock[i * 4 + 0] < bboxMin[0] )
		{
			bboxMin[0] = colorBlock[i * 4 + 0];
		}
		if( colorBlock[i * 4 + 1] < bboxMin[1] )
		{
			bboxMin[1] = colorBlock[i * 4 + 1];
		}
		if( colorBlock[i * 4 + 2] < bboxMin[2] )
		{
			bboxMin[2] = colorBlock[i * 4 + 2];
		}
		if( colorBlock[i * 4 + 3] < bboxMin[3] )
		{
			bboxMin[3] = colorBlock[i * 4 + 3];
		}
		if( colorBlock[i * 4 + 0] > bboxMax[0] )
		{
			bboxMax[0] = colorBlock[i * 4 + 0];
		}
		if( colorBlock[i * 4 + 1] > bboxMax[1] )
		{
			bboxMax[1] = colorBlock[i * 4 + 1];
		}
		if( colorBlock[i * 4 + 2] > bboxMax[2] )
		{
			bboxMax[2] = colorBlock[i * 4 + 2];
		}
		if( colorBlock[i * 4 + 3] > bboxMax[3] )
		{
			bboxMax[3] = colorBlock[i * 4 + 3];
		}
	}
	
	for( int i = 0; i < 64; i++ )
	{
		intColorBlock[i] = colorBlock[i];
	}
	
	// decrease range for 565 encoding
	bboxMin[0] >>= 3;
	bboxMin[1] >>= 2;
	bboxMin[2] >>= 3;
	bboxMax[0] >>= 3;
	bboxMax[1] >>= 2;
	bboxMax[2] >>= 3;
	
	bboxMin[3] = 0;
	bboxMax[3] = 255;
	
	// get the minimum distance the end points of the line must be apart along each axis
	for( int i = 0; i < 4; i++ )
	{
		minAxisDist[i] = 0;
	}
	
	// expand the bounding box
	const int C565_BBOX_EXPAND = 1;
	const int ALPHA_BBOX_EXPAND = 128;
	
#if 0 // object-space
	bboxMin[0] = ( bboxMin[0] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[0] - C565_BBOX_EXPAND;
	bboxMax[0] = ( bboxMax[0] >= ( 255 >> 3 ) - C565_BBOX_EXPAND ) ? ( 255 >> 3 ) : bboxMax[0] + C565_BBOX_EXPAND;
	bboxMin[2] = 0;
	bboxMax[2] = 0;
#else
	bboxMin[0] = 0;
	bboxMax[0] = 0;
	bboxMin[2] = ( bboxMin[2] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[2] - C565_BBOX_EXPAND;
	bboxMax[2] = ( bboxMax[2] >= ( 255 >> 2 ) - C565_BBOX_EXPAND ) ? ( 255 >> 2 ) : bboxMax[2] + C565_BBOX_EXPAND;
#endif
	
	bboxMin[1] = ( bboxMin[1] <= C565_BBOX_EXPAND ) ? 0 : bboxMin[1] - C565_BBOX_EXPAND;
	bboxMax[1] = ( bboxMax[1] >= ( 255 >> 2 ) - C565_BBOX_EXPAND ) ? ( 255 >> 2 ) : bboxMax[1] + C565_BBOX_EXPAND;
	
	bboxMin[3] = ( bboxMin[3] <= ALPHA_BBOX_EXPAND ) ? 0 : bboxMin[3] - ALPHA_BBOX_EXPAND;
	bboxMax[3] = ( bboxMax[3] >= ( 255 ) - ALPHA_BBOX_EXPAND ) ? ( 255 ) : bboxMax[3] + ALPHA_BBOX_EXPAND;
	
	for( i0 = bboxMin[0]; i0 <= bboxMax[0]; i0++ )
	{
		for( j0 = bboxMax[0]; j0 >= bboxMin[0]; j0-- )
		{
			if( abs( i0 - j0 ) < minAxisDist[0] )
			{
				continue;
			}
			
			for( i1 = bboxMin[1]; i1 <= bboxMax[1]; i1++ )
			{
				for( j1 = bboxMax[1]; j1 >= bboxMin[1]; j1-- )
				{
					if( abs( i1 - j1 ) < minAxisDist[1] )
					{
						continue;
					}
					
					for( i2 = bboxMin[2]; i2 <= bboxMax[2]; i2++ )
					{
						for( j2 = bboxMax[2]; j2 >= bboxMin[2]; j2-- )
						{
							if( abs( i2 - j2 ) < minAxisDist[2] )
							{
								continue;
							}
							
							unsigned short minColor565 = ( unsigned short )( ( i0 << 11 ) | ( i1 << 5 ) | i2 );
							unsigned short maxColor565 = ( unsigned short )( ( j0 << 11 ) | ( j1 << 5 ) | j2 );
							
							if( minColor565 > maxColor565 )
							{
								SwapValues( minColor565, maxColor565 );
							}
							
							error = GetSquareNormalsDXT1Error( intColorBlock, maxColor565, minColor565, bestError, tempColorIndices );
							if( error < bestError )
							{
								bestError = error;
								ColorFrom565( minColor565, minColor );
								ColorFrom565( maxColor565, maxColor );
								colorIndices = tempColorIndices;
							}
						}
					}
				}
			}
		}
	}
	
	bestError = MAX_TYPE( int );
	
	memcpy( tmin, minColor, 4 );
	memcpy( tmax, maxColor, 4 );
	
	for( i3 = bboxMin[3]; i3 <= bboxMax[3]; i3++ )
	{
		for( j3 = bboxMax[3]; j3 >= bboxMin[3]; j3-- )
		{
			if( abs( i3 - j3 ) < minAxisDist[3] )
			{
				continue;
			}
			
			tmin[3] = ( byte )j3;
			tmax[3] = ( byte )i3;
			
			error = GetSquareNormalsDXT5Error( intColorBlock, tmin, tmax, bestError, tempColorIndices, tempAlphaIndices );
			if( error < bestError )
			{
				bestError = error;
				memcpy( minColor, tmin, 4 );
				memcpy( maxColor, tmax, 4 );
				colorIndices = tempColorIndices;
				memcpy( alphaIndices, tempAlphaIndices, 6 );
			}
			
			tmin[3] = ( byte )i3;
			tmax[3] = ( byte )j3;
			
			error = GetSquareNormalsDXT5Error( intColorBlock, tmin, tmax, bestError, tempColorIndices, tempAlphaIndices );
			if( error < bestError )
			{
				bestError = error;
				memcpy( minColor, tmin, 4 );
				memcpy( maxColor, tmax, 4 );
				colorIndices = tempColorIndices;
				memcpy( alphaIndices, tempAlphaIndices, 6 );
			}
		}
	}
	
	return bestError;
}

/*
========================
idDxtEncoder::FindColorIndices

params:	colorBlock	- 16 pixel block for which find color indexes
paramO:	color0		- Min color found
paramO:	color1		- Max color found
return: 4 byte color index block
========================
*/
int idDxtEncoder::FindColorIndices( const byte* colorBlock, const unsigned short color0, const unsigned short color1, unsigned int& result ) const
{
	int i, j;
	unsigned int indexes[16];
	byte colors[4][4];
	
	ColorFrom565( color0, colors[0] );
	ColorFrom565( color1, colors[1] );
	
	if( color0 > color1 )
	{
		colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
		colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
		colors[2][2] = ( 2 * colors[0][2] + 1 * colors[1][2] ) / 3;
		colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
		colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
		colors[3][2] = ( 1 * colors[0][2] + 2 * colors[1][2] ) / 3;
	}
	else
	{
		colors[2][0] = ( 1 * colors[0][0] + 1 * colors[1][0] ) / 2;
		colors[2][1] = ( 1 * colors[0][1] + 1 * colors[1][1] ) / 2;
		colors[2][2] = ( 1 * colors[0][2] + 1 * colors[1][2] ) / 2;
		colors[3][0] = 0;
		colors[3][1] = 0;
		colors[3][2] = 0;
	}
	
	int error = 0;
	for( i = 0; i < 16; i++ )
	{
		unsigned int minDist = MAX_UNSIGNED_TYPE( int );
		for( j = 0; j < 4; j++ )
		{
			unsigned int dist = ColorDistance( &colorBlock[i * 4], &colors[j][0] );
			if( dist < minDist )
			{
				minDist = dist;
				indexes[i] = j;
			}
		}
		// accumulated error
		error += minDist;
	}
	
	result = 0;
	for( i = 0; i < 16; i++ )
	{
		result |= ( indexes[i] << ( unsigned int )( i << 1 ) );
	}
	
	return error;
}

/*
========================
idDxtEncoder::FindAlphaIndices

params:	colorBlock	- 16 pixel block for which find alpha indexes
paramO:	alpha0		- Min alpha found
paramO:	alpha1		- Max alpha found
params:	rindexes	- 6 byte alpha index block
return: error metric for this compression
========================
*/
int idDxtEncoder::FindAlphaIndices( const byte* colorBlock, const int alphaOffset, const byte alpha0, const byte alpha1, byte* rindexes ) const
{
	int i, j;
	unsigned int indexes[16];
	byte alphas[8];
	
	alphas[0] = alpha0;
	alphas[1] = alpha1;
	if( alpha0 > alpha1 )
	{
		alphas[2] = ( 6 * alpha0 + 1 * alpha1 ) / 7;
		alphas[3] = ( 5 * alpha0 + 2 * alpha1 ) / 7;
		alphas[4] = ( 4 * alpha0 + 3 * alpha1 ) / 7;
		alphas[5] = ( 3 * alpha0 + 4 * alpha1 ) / 7;
		alphas[6] = ( 2 * alpha0 + 5 * alpha1 ) / 7;
		alphas[7] = ( 1 * alpha0 + 6 * alpha1 ) / 7;
	}
	else
	{
		alphas[2] = ( 4 * alpha0 + 1 * alpha1 ) / 5;
		alphas[3] = ( 3 * alpha0 + 2 * alpha1 ) / 5;
		alphas[4] = ( 2 * alpha0 + 3 * alpha1 ) / 5;
		alphas[5] = ( 1 * alpha0 + 4 * alpha1 ) / 5;
		alphas[6] = 0;
		alphas[7] = 255;
	}
	
	int error = 0;
	for( i = 0; i < 16; i++ )
	{
		unsigned int minDist = MAX_UNSIGNED_TYPE( int );
		byte a = colorBlock[i * 4 + alphaOffset];
		for( j = 0; j < 8; j++ )
		{
			unsigned int dist = AlphaDistance( a, alphas[j] );
			if( dist < minDist )
			{
				minDist = dist;
				indexes[i] = j;
			}
		}
		error += minDist;
	}
	
	rindexes[0] = byte( ( indexes[ 0] >> 0 ) | ( indexes[ 1] << 3 ) | ( indexes[ 2] << 6 ) );
	rindexes[1] = byte( ( indexes[ 2] >> 2 ) | ( indexes[ 3] << 1 ) | ( indexes[ 4] << 4 ) | ( indexes[ 5] << 7 ) );
	rindexes[2] = byte( ( indexes[ 5] >> 1 ) | ( indexes[ 6] << 2 ) | ( indexes[ 7] << 5 ) );
	
	rindexes[3] = byte( ( indexes[ 8] >> 0 ) | ( indexes[ 9] << 3 ) | ( indexes[10] << 6 ) );
	rindexes[4] = byte( ( indexes[10] >> 2 ) | ( indexes[11] << 1 ) | ( indexes[12] << 4 ) | ( indexes[13] << 7 ) );
	rindexes[5] = byte( ( indexes[13] >> 1 ) | ( indexes[14] << 2 ) | ( indexes[15] << 5 ) );
	
	return error;
}

/*
========================
idDxtEncoder::FindCTX1Indices

params:	colorBlock	- 16 pixel block for which find color indexes
paramO:	color0		- Min color found
paramO:	color1		- Max color found
return: 4 byte color index block
========================
*/
int idDxtEncoder::FindCTX1Indices( const byte* colorBlock, const byte* color0, const byte* color1, unsigned int& result ) const
{
	int i, j;
	unsigned int indexes[16];
	byte colors[4][4];
	
	colors[0][0] = color1[0];
	colors[0][1] = color1[1];
	colors[1][0] = color0[0];
	colors[1][1] = color0[1];
	
	colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
	colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
	colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
	colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
	
	int error = 0;
	for( i = 0; i < 16; i++ )
	{
		unsigned int minDist = MAX_UNSIGNED_TYPE( int );
		for( j = 0; j < 4; j++ )
		{
			unsigned int dist = CTX1Distance( &colorBlock[i * 4], &colors[j][0] );
			if( dist < minDist )
			{
				minDist = dist;
				indexes[i] = j;
			}
		}
		// accumulated error
		error += minDist;
	}
	
	result = 0;
	for( i = 0; i < 16; i++ )
	{
		result |= ( indexes[i] << ( unsigned int )( i << 1 ) );
	}
	
	return error;
}

/*
========================
idDxtEncoder::CompressImageDXT1HQ

params:	inBuf		- image to compress
paramO:	outBuf		- result of compression
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::CompressImageDXT1HQ( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte block[64] );
	unsigned int colorIndices1;
	unsigned int colorIndices2;
	byte col1[4];
	byte col2[4];
	int error1;
	int error2;
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	if( width > 4 && ( width & 3 ) != 0 )
	{
		return;
	}
	if( height > 4 && ( height & 3 ) != 0 )
	{
		return;
	}
	
	if( width < 4 || height < 4 )
	{
		WriteTinyColorDXT1( inBuf, width, height );
		return;
	}
	
	for( int j = 0; j < height; j += 4, inBuf += width * 4 * 4 )
	{
		for( int i = 0; i < width; i += 4 )
		{
		
			ExtractBlock( inBuf + i * 4, width, block );
			
			GetMinMaxColorsHQ( block, col1, col2, false );
			
			// Write out color data. Try and find minimum error for the two encoding methods.
			unsigned short scol1 = ColorTo565( col1 );
			unsigned short scol2 = ColorTo565( col2 );
			
			error1 = FindColorIndices( block, scol1, scol2, colorIndices1 );
			error2 = FindColorIndices( block, scol2, scol1, colorIndices2 );
			
			if( error1 < error2 )
			{
			
				EmitUShort( scol1 );
				EmitUShort( scol2 );
				EmitUInt( colorIndices1 );
				
			}
			else
			{
			
				EmitUShort( scol2 );
				EmitUShort( scol1 );
				EmitUInt( colorIndices2 );
			}
			
			//idLib::Printf( "\r%3d%%", ( j * width + i ) * 100 / ( width * height ) );
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
	
	//idLib::Printf( "\r100%%\n" );
}

/*
========================
idDxtEncoder::CompressImageDXT5HQ

params:	inBuf		- image to compress
paramO:	outBuf		- result of compression
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::CompressImageDXT5HQ( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte block[64] );
	byte alphaIndices1[6];
	byte alphaIndices2[6];
	unsigned int colorIndices;
	byte col1[4];
	byte col2[4];
	int error1;
	int error2;
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	if( width > 4 && ( width & 3 ) != 0 )
	{
		return;
	}
	if( height > 4 && ( height & 3 ) != 0 )
	{
		return;
	}
	
	if( width < 4 || height < 4 )
	{
		WriteTinyColorDXT5( inBuf, width, height );
		return;
	}
	
	for( int j = 0; j < height; j += 4, inBuf += width * 4 * 4 )
	{
		for( int i = 0; i < width; i += 4 )
		{
		
			ExtractBlock( inBuf + i * 4, width, block );
			
			GetMinMaxColorsHQ( block, col1, col2, true );
			GetMinMaxAlphaHQ( block, 3, col1, col2 );
			
			// Write out alpha data. Try and find minimum error for the two encoding methods.
			error1 = FindAlphaIndices( block, 3, col1[3], col2[3], alphaIndices1 );
			error2 = FindAlphaIndices( block, 3, col2[3], col1[3], alphaIndices2 );
			
			if( error1 < error2 )
			{
			
				EmitByte( col1[3] );
				EmitByte( col2[3] );
				EmitByte( alphaIndices1[0] );
				EmitByte( alphaIndices1[1] );
				EmitByte( alphaIndices1[2] );
				EmitByte( alphaIndices1[3] );
				EmitByte( alphaIndices1[4] );
				EmitByte( alphaIndices1[5] );
				
			}
			else
			{
			
				EmitByte( col2[3] );
				EmitByte( col1[3] );
				EmitByte( alphaIndices2[0] );
				EmitByte( alphaIndices2[1] );
				EmitByte( alphaIndices2[2] );
				EmitByte( alphaIndices2[3] );
				EmitByte( alphaIndices2[4] );
				EmitByte( alphaIndices2[5] );
			}
			
#ifdef NVIDIA_7X_HARDWARE_BUG_FIX
			NV4XHardwareBugFix( col2, col1 );
#endif
			
			// Write out color data. Always take the path with 4 interpolated values.
			unsigned short scol1 = ColorTo565( col1 );
			unsigned short scol2 = ColorTo565( col2 );
			
			EmitUShort( scol1 );
			EmitUShort( scol2 );
			
			FindColorIndices( block, scol1, scol2, colorIndices );
			EmitUInt( colorIndices );
			
			//idLib::Printf( "\r%3d%%", ( j * width + i ) * 100 / ( width * height ) );
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
	
	//idLib::Printf( "\r100%%\n" );
}

/*
========================
idDxtEncoder::CompressImageCTX1HQ

params:	inBuf		- image to compress
paramO:	outBuf		- result of compression
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::CompressImageCTX1HQ( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte block[64] );
	unsigned int colorIndices;
	byte col1[4];
	byte col2[4];
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	if( width > 4 && ( width & 3 ) != 0 )
	{
		return;
	}
	if( height > 4 && ( height & 3 ) != 0 )
	{
		return;
	}
	
	if( width < 4 || height < 4 )
	{
		WriteTinyColorCTX1DXT5A( inBuf, width, height );
		return;
	}
	
	for( int j = 0; j < height; j += 4, inBuf += width * 4 * 4 )
	{
		for( int i = 0; i < width; i += 4 )
		{
		
			ExtractBlock( inBuf + i * 4, width, block );
			
			GetMinMaxCTX1HQ( block, col1, col2 );
			
			EmitByte( col2[0] );
			EmitByte( col2[1] );
			EmitByte( col1[0] );
			EmitByte( col1[1] );
			
			FindCTX1Indices( block, col1, col2, colorIndices );
			EmitUInt( colorIndices );
			
			//idLib::Printf( "\r%3d%%", ( j * width + i ) * 100 / ( width * height ) );
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
	
	//idLib::Printf( "\r100%%\n" );
}

/*
========================
idDxtEncoder::ScaleYCoCg

params:	colorBlock	- 16 pixel block for which find color indexes
========================
*/
void idDxtEncoder::ScaleYCoCg( byte* colorBlock ) const
{
	ALIGN16( byte minColor[4] );
	ALIGN16( byte maxColor[4] );
	
	minColor[0] = minColor[1] = minColor[2] = minColor[3] = 255;
	maxColor[0] = maxColor[1] = maxColor[2] = maxColor[3] = 0;
	
	for( int i = 0; i < 16; i++ )
	{
		if( colorBlock[i * 4 + 0] < minColor[0] )
		{
			minColor[0] = colorBlock[i * 4 + 0];
		}
		if( colorBlock[i * 4 + 1] < minColor[1] )
		{
			minColor[1] = colorBlock[i * 4 + 1];
		}
		if( colorBlock[i * 4 + 0] > maxColor[0] )
		{
			maxColor[0] = colorBlock[i * 4 + 0];
		}
		if( colorBlock[i * 4 + 1] > maxColor[1] )
		{
			maxColor[1] = colorBlock[i * 4 + 1];
		}
	}
	
	int m0 = abs( minColor[0] - 128 );
	int m1 = abs( minColor[1] - 128 );
	int m2 = abs( maxColor[0] - 128 );
	int m3 = abs( maxColor[1] - 128 );
	
	if( m1 > m0 ) m0 = m1;
	if( m3 > m2 ) m2 = m3;
	if( m2 > m0 ) m0 = m2;
	
	const int s0 = 128 / 2 - 1;
	const int s1 = 128 / 4 - 1;
	
	int scale = 1 + ( m0 <= s0 ) + 2 * ( m0 <= s1 );
	
	for( int i = 0; i < 16; i++ )
	{
		colorBlock[i * 4 + 0] = byte( ( colorBlock[i * 4 + 0] - 128 ) * scale + 128 );
		colorBlock[i * 4 + 1] = byte( ( colorBlock[i * 4 + 1] - 128 ) * scale + 128 );
		colorBlock[i * 4 + 2] = byte( ( scale - 1 ) << 3 );
	}
}

/*
========================
idDxtEncoder::CompressYCoCgDXT5HQ

params:	inBuf		- image to compress
paramO:	outBuf		- result of compression
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::CompressYCoCgDXT5HQ( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte block[64] );
	byte alphaIndices1[6];
	byte alphaIndices2[6];
	unsigned int colorIndices;
	byte col1[4];
	byte col2[4];
	int error1;
	int error2;
	
	assert( HasConstantValuePer4x4Block( inBuf, width, height, 2 ) );
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	if( width > 4 && ( width & 3 ) != 0 )
	{
		return;
	}
	if( height > 4 && ( height & 3 ) != 0 )
	{
		return;
	}
	
	if( width < 4 || height < 4 )
	{
		WriteTinyColorDXT5( inBuf, width, height );
		return;
	}
	
	for( int j = 0; j < height; j += 4, inBuf += width * 4 * 4 )
	{
		for( int i = 0; i < width; i += 4 )
		{
		
			ExtractBlock( inBuf + i * 4, width, block );
			ScaleYCoCg( block );
			
			GetMinMaxColorsHQ( block, col1, col2, true );
			GetMinMaxAlphaHQ( block, 3, col1, col2 );
			
			// Write out alpha data. Try and find minimum error for the two encoding methods.
			error1 = FindAlphaIndices( block, 3, col1[3], col2[3], alphaIndices1 );
			error2 = FindAlphaIndices( block, 3, col2[3], col1[3], alphaIndices2 );
			
			if( error1 < error2 )
			{
			
				EmitByte( col1[3] );
				EmitByte( col2[3] );
				EmitByte( alphaIndices1[0] );
				EmitByte( alphaIndices1[1] );
				EmitByte( alphaIndices1[2] );
				EmitByte( alphaIndices1[3] );
				EmitByte( alphaIndices1[4] );
				EmitByte( alphaIndices1[5] );
				
			}
			else
			{
			
				EmitByte( col2[3] );
				EmitByte( col1[3] );
				EmitByte( alphaIndices2[0] );
				EmitByte( alphaIndices2[1] );
				EmitByte( alphaIndices2[2] );
				EmitByte( alphaIndices2[3] );
				EmitByte( alphaIndices2[4] );
				EmitByte( alphaIndices2[5] );
			}
			
#ifdef NVIDIA_7X_HARDWARE_BUG_FIX
			NV4XHardwareBugFix( col2, col1 );
#endif
			
			// Write out color data. Always take the path with 4 interpolated values.
			unsigned short scol1 = ColorTo565( col1 );
			unsigned short scol2 = ColorTo565( col2 );
			
			EmitUShort( scol1 );
			EmitUShort( scol2 );
			
			FindColorIndices( block, scol1, scol2, colorIndices );
			EmitUInt( colorIndices );
			
			//idLib::Printf( "\r%3d%%", ( j * width + i ) * 100 / ( width * height ) );
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
	
	//idLib::Printf( "\r100%%\n" );
}

/*
========================
idDxtEncoder::CompressYCoCgCTX1DXT5AHQ

params:	inBuf		- image to compress
paramO:	outBuf		- result of compression
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::CompressYCoCgCTX1DXT5AHQ( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte block[64] );
	byte alphaIndices1[6];
	byte alphaIndices2[6];
	unsigned int colorIndices;
	byte col1[4];
	byte col2[4];
	int error1;
	int error2;
	
	assert( HasConstantValuePer4x4Block( inBuf, width, height, 2 ) );
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	if( width > 4 && ( width & 3 ) != 0 )
	{
		return;
	}
	if( height > 4 && ( height & 3 ) != 0 )
	{
		return;
	}
	
	if( width < 4 || height < 4 )
	{
		WriteTinyColorCTX1DXT5A( inBuf, width, height );
		return;
	}
	
	for( int j = 0; j < height; j += 4, inBuf += width * 4 * 4 )
	{
		for( int i = 0; i < width; i += 4 )
		{
		
			ExtractBlock( inBuf + i * 4, width, block );
			
			GetMinMaxAlphaHQ( block, 3, col1, col2 );
			
			// Write out alpha data. Try and find minimum error for the two encoding methods.
			error1 = FindAlphaIndices( block, 3, col1[3], col2[3], alphaIndices1 );
			error2 = FindAlphaIndices( block, 3, col2[3], col1[3], alphaIndices2 );
			
			if( error1 < error2 )
			{
			
				EmitByte( col1[3] );
				EmitByte( col2[3] );
				EmitByte( alphaIndices1[0] );
				EmitByte( alphaIndices1[1] );
				EmitByte( alphaIndices1[2] );
				EmitByte( alphaIndices1[3] );
				EmitByte( alphaIndices1[4] );
				EmitByte( alphaIndices1[5] );
				
			}
			else
			{
			
				EmitByte( col2[3] );
				EmitByte( col1[3] );
				EmitByte( alphaIndices2[0] );
				EmitByte( alphaIndices2[1] );
				EmitByte( alphaIndices2[2] );
				EmitByte( alphaIndices2[3] );
				EmitByte( alphaIndices2[4] );
				EmitByte( alphaIndices2[5] );
			}
			
			GetMinMaxCTX1HQ( block, col1, col2 );
			
			EmitByte( col2[0] );
			EmitByte( col2[1] );
			EmitByte( col1[0] );
			EmitByte( col1[1] );
			
			FindCTX1Indices( block, col1, col2, colorIndices );
			EmitUInt( colorIndices );
			
			//idLib::Printf( "\r%3d%%", ( j * width + i ) * 100 / ( width * height ) );
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
	
	//idLib::Printf( "\r100%%\n" );
}

/*
========================
idDxtEncoder::RotateNormalsDXT1
========================
*/
void idDxtEncoder::RotateNormalsDXT1( byte* block ) const
{
	byte rotatedBlock[64];
	byte col1[4];
	byte col2[4];
	int bestError = MAX_TYPE( int );
	int bestRotation = 0;
	
	for( int i = 0; i < 32; i += 1 )
	{
		int r = ( i << 3 ) | ( i >> 2 );
		float angle = ( r / 255.0f ) * idMath::PI;
		float s = sin( angle );
		float c = cos( angle );
		
		for( int j = 0; j < 16; j++ )
		{
			float x = block[j * 4 + 0] / 255.0f * 2.0f - 1.0f;
			float y = block[j * 4 + 1] / 255.0f * 2.0f - 1.0f;
			float rx = c * x - s * y;
			float ry = s * x + c * y;
			rotatedBlock[j * 4 + 0] = idMath::Ftob( ( rx + 1.0f ) / 2.0f * 255.0f );
			rotatedBlock[j * 4 + 1] = idMath::Ftob( ( ry + 1.0f ) / 2.0f * 255.0f );
		}
		
		int error = GetMinMaxColorsHQ( rotatedBlock, col1, col2, true );
		if( error < bestError )
		{
			bestError = error;
			bestRotation = r;
		}
	}
	
	float angle = ( bestRotation / 255.0f ) * idMath::PI;
	float s = sin( angle );
	float c = cos( angle );
	
	for( int j = 0; j < 16; j++ )
	{
		float x = block[j * 4 + 0] / 255.0f * 2.0f - 1.0f;
		float y = block[j * 4 + 1] / 255.0f * 2.0f - 1.0f;
		float rx = c * x - s * y;
		float ry = s * x + c * y;
		block[j * 4 + 0] = idMath::Ftob( ( rx + 1.0f ) / 2.0f * 255.0f );
		block[j * 4 + 1] = idMath::Ftob( ( ry + 1.0f ) / 2.0f * 255.0f );
		block[j * 4 + 2] = ( byte )bestRotation;
	}
}

/*
========================
idDxtEncoder::CompressNormalMapDXT1HQ

params:	inBuf		- image to compress
paramO:	outBuf		- result of compression
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::CompressNormalMapDXT1HQ( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte block[64] );
	unsigned int colorIndices;
	byte col1[4];
	byte col2[4];
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	if( width > 4 && ( width & 3 ) != 0 )
	{
		return;
	}
	if( height > 4 && ( height & 3 ) != 0 )
	{
		return;
	}
	
	if( width < 4 || height < 4 )
	{
		WriteTinyColorDXT1( inBuf, width, height );
		return;
	}
	
	for( int j = 0; j < height; j += 4, inBuf += width * 4 * 4 )
	{
		for( int i = 0; i < width; i += 4 )
		{
		
			ExtractBlock( inBuf + i * 4, width, block );
			
			for( int k = 0; k < 16; k++ )
			{
				block[k * 4 + 2] = 0;
			}
			
			GetMinMaxColorsHQ( block, col1, col2, true );
			
			// Write out color data. Always take the path with 4 interpolated values.
			unsigned short scol1 = ColorTo565( col1 );
			unsigned short scol2 = ColorTo565( col2 );
			
			EmitUShort( scol1 );
			EmitUShort( scol2 );
			
			FindColorIndices( block, scol1, scol2, colorIndices );
			EmitUInt( colorIndices );
			
			//idLib::Printf( "\r%3d%%", ( j * width + i * 4 ) * 100 / ( width * height ) );
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
	
	//idLib::Printf( "\r100%%\n" );
}

/*
========================
idDxtEncoder::CompressNormalMapDXT1RenormalizeHQ

params:	inBuf		- image to compress
paramO:	outBuf		- result of compression
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::CompressNormalMapDXT1RenormalizeHQ( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte block[64] );
	unsigned int colorIndices;
	byte col1[4];
	byte col2[4];
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	if( width > 4 && ( width & 3 ) != 0 )
	{
		return;
	}
	if( height > 4 && ( height & 3 ) != 0 )
	{
		return;
	}
	
	if( width < 4 || height < 4 )
	{
		WriteTinyColorDXT1( inBuf, width, height );
		return;
	}
	
	for( int j = 0; j < height; j += 4, inBuf += width * 4 * 4 )
	{
		for( int i = 0; i < width; i += 4 )
		{
		
			ExtractBlock( inBuf + i * 4, width, block );
			
			// clear alpha channel
			for( int k = 0; k < 16; k++ )
			{
				block[k * 4 + 3] = 0;
			}
			
			GetMinMaxNormalsDXT1HQ( block, col1, col2, colorIndices, true );
			
			// Write out color data. Always take the path with 4 interpolated values.
			unsigned short scol1 = ColorTo565( col1 );
			unsigned short scol2 = ColorTo565( col2 );
			
			EmitUShort( scol1 );
			EmitUShort( scol2 );
			EmitUInt( colorIndices );
			
			////idLib::Printf( "\r%3d%%", ( j * width + i * 4 ) * 100 / ( width * height ) );
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
	
	////idLib::Printf( "\r100%%\n" );
}

#define USE_SCALE		1
#define USE_BIAS		1

static int c_blocks;
static int c_scaled;
static int c_scaled2x;
static int c_scaled4x;
static int c_differentBias;
static int c_biasHelped;

/*
========================
idDxtEncoder::BiasScaleNormalY

	* scale2x = 33%
	* scale4x = 23%
	* bias + scale2x = 30%
	* bias + scale4x = 55%
========================
*/
void idDxtEncoder::BiasScaleNormalY( byte* colorBlock ) const
{

	byte minColor = 255;
	byte maxColor = 0;
	
	for( int i = 0; i < 16; i++ )
	{
		if( colorBlock[i * 4 + 1] < minColor )
		{
			minColor = colorBlock[i * 4 + 1];
		}
		if( colorBlock[i * 4 + 1] > maxColor )
		{
			maxColor = colorBlock[i * 4 + 1];
		}
	}
	
	int bestBias = 128;
	int bestRange = Max( abs( minColor - bestBias ), abs( maxColor - bestBias ) );
#if USE_BIAS
	for( int i = 0; i < 32; i++ )
	{
		int bias = ( ( i << 3 ) | ( i >> 2 ) ) - 4;
		int range = Max( abs( minColor - bias ), abs( maxColor - bias ) );
		if( range < bestRange )
		{
			bestRange = range;
			bestBias = bias;
		}
	}
#endif
	
	const int s0 = 128 / 2 - 1;
	const int s1 = 128 / 4 - 1;
	
#if USE_SCALE
	int scale = 1 + ( bestRange <= s0 ) + 2 * ( bestRange <= s1 );
#else
	int scale = 1;
#endif
	
	if( scale == 1 )
	{
		bestBias = 128;
	}
	else
	{
		c_scaled++;
		if( scale == 2 ) c_scaled2x++;
		if( scale == 4 ) c_scaled4x++;
		if( bestBias != 128 )
		{
			c_differentBias++;
			int r = Max( abs( minColor - 128 ), abs( maxColor - 128 ) );
			int s = 1 + ( r <= s0 ) + 2 * ( r <= s1 );
			if( scale > s )
			{
				c_biasHelped++;
			}
		}
	}
	
	c_blocks++;
	
	for( int i = 0; i < 16; i++ )
	{
		colorBlock[i * 4 + 0] = byte( bestBias + 4 );
		colorBlock[i * 4 + 1] = byte( ( colorBlock[i * 4 + 1] - bestBias ) * scale + 128 );
		colorBlock[i * 4 + 2] = byte( ( scale - 1 ) << 3 );
	}
}

/*
========================
idDxtEncoder::RotateNormalsDXT5
========================
*/
void idDxtEncoder::RotateNormalsDXT5( byte* block ) const
{
	byte rotatedBlock[64];
	byte col1[4];
	byte col2[4];
	int bestError = MAX_TYPE( int );
	int bestRotation = 0;
	int bestScale = 1;
	
	for( int i = 0; i < 32; i += 1 )
	{
		int r = ( i << 3 ) | ( i >> 2 );
		float angle = ( r / 255.0f ) * idMath::PI;
		float s = sin( angle );
		float c = cos( angle );
		
		for( int j = 0; j < 16; j++ )
		{
			float x = block[j * 4 + 3] / 255.0f * 2.0f - 1.0f;
			float y = block[j * 4 + 1] / 255.0f * 2.0f - 1.0f;
			float rx = c * x - s * y;
			float ry = s * x + c * y;
			rotatedBlock[j * 4 + 3] = idMath::Ftob( ( rx + 1.0f ) / 2.0f * 255.0f );
			rotatedBlock[j * 4 + 1] = idMath::Ftob( ( ry + 1.0f ) / 2.0f * 255.0f );
		}
		
#if USE_SCALE
		byte minColor = 255;
		byte maxColor = 0;
		
		for( int j = 0; j < 16; j++ )
		{
			if( rotatedBlock[j * 4 + 1] < minColor )
			{
				minColor = rotatedBlock[j * 4 + 1];
			}
			if( rotatedBlock[j * 4 + 1] > maxColor )
			{
				maxColor = rotatedBlock[j * 4 + 1];
			}
		}
		
		const int s0 = 128 / 2 - 1;
		const int s1 = 128 / 4 - 1;
		
		int range = Max( abs( minColor - 128 ), abs( maxColor - 128 ) );
		int scale = 1 + ( range <= s0 ) + 2 * ( range <= s1 );
		
		for( int j = 0; j < 16; j++ )
		{
			rotatedBlock[j * 4 + 1] = byte( ( rotatedBlock[j * 4 + 1] - 128 ) * scale + 128 );
		}
#endif
		
		int errorY = GetMinMaxNormalYHQ( rotatedBlock, col1, col2, true, scale );
		int errorX = GetMinMaxAlphaHQ( rotatedBlock, 3, col1, col2 );
		int error = errorX + errorY;
		if( error < bestError )
		{
			bestError = error;
			bestRotation = r;
			bestScale = scale;
		}
	}
	
	float angle = ( bestRotation / 255.0f ) * idMath::PI;
	float s = sin( angle );
	float c = cos( angle );
	
	for( int j = 0; j < 16; j++ )
	{
		float x = block[j * 4 + 3] / 255.0f * 2.0f - 1.0f;
		float y = block[j * 4 + 1] / 255.0f * 2.0f - 1.0f;
		float rx = c * x - s * y;
		float ry = s * x + c * y;
		block[j * 4 + 0] = ( byte )bestRotation;
		block[j * 4 + 1] = idMath::Ftob( ( ry + 1.0f ) / 2.0f * 255.0f );
		block[j * 4 + 3] = idMath::Ftob( ( rx + 1.0f ) / 2.0f * 255.0f );
		
#if USE_SCALE
		block[j * 4 + 1] = byte( ( block[j * 4 + 1] - 128 ) * bestScale + 128 );
		block[j * 4 + 2] = byte( ( bestScale - 1 ) << 3 );
#endif
	}
}

/*
========================
idDxtEncoder::CompressNormalMapDXT5HQ

params:	inBuf		- image to compress
paramO:	outBuf		- result of compression
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::CompressNormalMapDXT5HQ( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte block[64] );
	byte alphaIndices1[6];
	byte alphaIndices2[6];
	unsigned int colorIndices;
	byte col1[4];
	byte col2[4];
	int error1;
	int error2;
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	if( width > 4 && ( width & 3 ) != 0 )
	{
		return;
	}
	if( height > 4 && ( height & 3 ) != 0 )
	{
		return;
	}
	
	if( width < 4 || height < 4 )
	{
		WriteTinyColorDXT5( inBuf, width, height );
		return;
	}
	
	for( int j = 0; j < height; j += 4, inBuf += width * 4 * 4 )
	{
		for( int i = 0; i < width; i += 4 )
		{
		
			ExtractBlock( inBuf + i * 4, width, block );
			
			// swizzle components
			for( int k = 0; k < 16; k++ )
			{
				block[k * 4 + 3] = block[k * 4 + 0];
				block[k * 4 + 0] = 0;
				block[k * 4 + 2] = 0;
			}
			
			//BiasScaleNormalY( block );
			//RotateNormalsDXT5( block );
			
			GetMinMaxNormalYHQ( block, col1, col2, true, 1 );
			GetMinMaxAlphaHQ( block, 3, col1, col2 );
			
			// Write out alpha data. Try and find minimum error for the two encoding methods.
			error1 = FindAlphaIndices( block, 3, col1[3], col2[3], alphaIndices1 );
			error2 = FindAlphaIndices( block, 3, col2[3], col1[3], alphaIndices2 );
			
			if( error1 < error2 )
			{
			
				EmitByte( col1[3] );
				EmitByte( col2[3] );
				EmitByte( alphaIndices1[0] );
				EmitByte( alphaIndices1[1] );
				EmitByte( alphaIndices1[2] );
				EmitByte( alphaIndices1[3] );
				EmitByte( alphaIndices1[4] );
				EmitByte( alphaIndices1[5] );
				
			}
			else
			{
			
				EmitByte( col2[3] );
				EmitByte( col1[3] );
				EmitByte( alphaIndices2[0] );
				EmitByte( alphaIndices2[1] );
				EmitByte( alphaIndices2[2] );
				EmitByte( alphaIndices2[3] );
				EmitByte( alphaIndices2[4] );
				EmitByte( alphaIndices2[5] );
			}
			
#ifdef NVIDIA_7X_HARDWARE_BUG_FIX
			NV4XHardwareBugFix( col2, col1 );
#endif
			
			// Write out color data. Always take the path with 4 interpolated values.
			unsigned short scol1 = ColorTo565( col1 );
			unsigned short scol2 = ColorTo565( col2 );
			
			EmitUShort( scol1 );
			EmitUShort( scol2 );
			
			FindColorIndices( block, scol1, scol2, colorIndices );
			EmitUInt( colorIndices );
			
			//idLib::Printf( "\r%3d%%", ( j * width + i ) * 100 / ( width * height ) );
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
	
	//idLib::Printf( "\r100%%\n" );
}

/*
========================
idDxtEncoder::CompressNormalMapDXT5RenormalizeHQ

params:	inBuf		- image to compress
paramO:	outBuf		- result of compression
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::CompressNormalMapDXT5RenormalizeHQ( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte block[64] );
	unsigned int colorIndices;
	byte alphaIndices[6];
	byte col1[4];
	byte col2[4];
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	if( width > 4 && ( width & 3 ) != 0 )
	{
		return;
	}
	if( height > 4 && ( height & 3 ) != 0 )
	{
		return;
	}
	
	if( width < 4 || height < 4 )
	{
		WriteTinyColorDXT5( inBuf, width, height );
		return;
	}
	
	for( int j = 0; j < height; j += 4, inBuf += width * 4 * 4 )
	{
		for( int i = 0; i < width; i += 4 )
		{
		
			ExtractBlock( inBuf + i * 4, width, block );
			
			// swizzle components
			for( int k = 0; k < 16; k++ )
			{
#if 0 // object-space
				block[k * 4 + 3] = block[k * 4 + 2];
				block[k * 4 + 2] = 0;
#else
				block[k * 4 + 3] = block[k * 4 + 0];
				block[k * 4 + 0] = 0;
#endif
			}
			
			GetMinMaxNormalsDXT5HQFast( block, col1, col2, colorIndices, alphaIndices );
			
			EmitByte( col2[3] );
			EmitByte( col1[3] );
			EmitByte( alphaIndices[0] );
			EmitByte( alphaIndices[1] );
			EmitByte( alphaIndices[2] );
			EmitByte( alphaIndices[3] );
			EmitByte( alphaIndices[4] );
			EmitByte( alphaIndices[5] );
			
			unsigned short scol1 = ColorTo565( col1 );
			unsigned short scol2 = ColorTo565( col2 );
			
			EmitUShort( scol2 );
			EmitUShort( scol1 );
			EmitUInt( colorIndices );
			
			////idLib::Printf( "\r%3d%%", ( j * width + i ) * 100 / ( width * height ) );
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
	
	////idLib::Printf( "\r100%%\n" );
}

/*
========================
idDxtEncoder::CompressNormalMapDXN2HQ

params:	inBuf		- image to compress
paramO:	outBuf		- result of compression
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::CompressNormalMapDXN2HQ( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte block[64] );
	byte alphaIndices1[6];
	byte alphaIndices2[6];
	byte col1[4];
	byte col2[4];
	int error1;
	int error2;
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	if( width > 4 && ( width & 3 ) != 0 )
	{
		return;
	}
	if( height > 4 && ( height & 3 ) != 0 )
	{
		return;
	}
	
	if( width < 4 || height < 4 )
	{
		WriteTinyColorDXT5( inBuf, width, height );
		return;
	}
	
	for( int j = 0; j < height; j += 4, inBuf += width * 4 * 4 )
	{
		for( int i = 0; i < width; i += 4 )
		{
		
			ExtractBlock( inBuf + i * 4, width, block );
			
			for( int k = 0; k < 2; k++ )
			{
				GetMinMaxAlphaHQ( block, k, col1, col2 );
				
				// Write out alpha data. Try and find minimum error for the two encoding methods.
				error1 = FindAlphaIndices( block, k, col1[k], col2[k], alphaIndices1 );
				error2 = FindAlphaIndices( block, k, col2[k], col1[k], alphaIndices2 );
				
				if( error1 < error2 )
				{
				
					EmitByte( col1[k] );
					EmitByte( col2[k] );
					EmitByte( alphaIndices1[0] );
					EmitByte( alphaIndices1[1] );
					EmitByte( alphaIndices1[2] );
					EmitByte( alphaIndices1[3] );
					EmitByte( alphaIndices1[4] );
					EmitByte( alphaIndices1[5] );
					
				}
				else
				{
				
					EmitByte( col2[k] );
					EmitByte( col1[k] );
					EmitByte( alphaIndices2[0] );
					EmitByte( alphaIndices2[1] );
					EmitByte( alphaIndices2[2] );
					EmitByte( alphaIndices2[3] );
					EmitByte( alphaIndices2[4] );
					EmitByte( alphaIndices2[5] );
				}
			}
			
			//idLib::Printf( "\r%3d%%", ( j * width + i ) * 100 / ( width * height ) );
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
	
	//idLib::Printf( "\r100%%\n" );
}

/*
========================
idDxtEncoder::GetMinMaxBBox

Takes the extents of the bounding box of the colors in the 4x4 block in RGB space.
Also finds the minimum and maximum alpha values.

params:	colorBlock	- 4*4 input tile, 4 bytes per pixel
paramO:	minColor	- 4 byte Min color found
paramO:	maxColor	- 4 byte Max color found
========================
*/
ID_INLINE void idDxtEncoder::GetMinMaxBBox( const byte* colorBlock, byte* minColor, byte* maxColor ) const
{

	minColor[0] = minColor[1] = minColor[2] = minColor[3] = 255;
	maxColor[0] = maxColor[1] = maxColor[2] = maxColor[3] = 0;
	
	for( int i = 0; i < 16; i++ )
	{
		if( colorBlock[i * 4 + 0] < minColor[0] )
		{
			minColor[0] = colorBlock[i * 4 + 0];
		}
		if( colorBlock[i * 4 + 1] < minColor[1] )
		{
			minColor[1] = colorBlock[i * 4 + 1];
		}
		if( colorBlock[i * 4 + 2] < minColor[2] )
		{
			minColor[2] = colorBlock[i * 4 + 2];
		}
		if( colorBlock[i * 4 + 3] < minColor[3] )
		{
			minColor[3] = colorBlock[i * 4 + 3];
		}
		if( colorBlock[i * 4 + 0] > maxColor[0] )
		{
			maxColor[0] = colorBlock[i * 4 + 0];
		}
		if( colorBlock[i * 4 + 1] > maxColor[1] )
		{
			maxColor[1] = colorBlock[i * 4 + 1];
		}
		if( colorBlock[i * 4 + 2] > maxColor[2] )
		{
			maxColor[2] = colorBlock[i * 4 + 2];
		}
		if( colorBlock[i * 4 + 3] > maxColor[3] )
		{
			maxColor[3] = colorBlock[i * 4 + 3];
		}
	}
}

/*
========================
idDxtEncoder::InsetColorsBBox
========================
*/
ID_INLINE void idDxtEncoder::InsetColorsBBox( byte* minColor, byte* maxColor ) const
{
	byte inset[4];
	
	inset[0] = ( maxColor[0] - minColor[0] ) >> INSET_COLOR_SHIFT;
	inset[1] = ( maxColor[1] - minColor[1] ) >> INSET_COLOR_SHIFT;
	inset[2] = ( maxColor[2] - minColor[2] ) >> INSET_COLOR_SHIFT;
	inset[3] = ( maxColor[3] - minColor[3] ) >> INSET_ALPHA_SHIFT;
	
	minColor[0] = ( minColor[0] + inset[0] <= 255 ) ? minColor[0] + inset[0] : 255;
	minColor[1] = ( minColor[1] + inset[1] <= 255 ) ? minColor[1] + inset[1] : 255;
	minColor[2] = ( minColor[2] + inset[2] <= 255 ) ? minColor[2] + inset[2] : 255;
	minColor[3] = ( minColor[3] + inset[3] <= 255 ) ? minColor[3] + inset[3] : 255;
	
	maxColor[0] = ( maxColor[0] >= inset[0] ) ? maxColor[0] - inset[0] : 0;
	maxColor[1] = ( maxColor[1] >= inset[1] ) ? maxColor[1] - inset[1] : 0;
	maxColor[2] = ( maxColor[2] >= inset[2] ) ? maxColor[2] - inset[2] : 0;
	maxColor[3] = ( maxColor[3] >= inset[3] ) ? maxColor[3] - inset[3] : 0;
}

/*
========================
idDxtEncoder::SelectColorsDiagonal
========================
*/
void idDxtEncoder::SelectColorsDiagonal( const byte* colorBlock, byte* minColor, byte* maxColor ) const
{

	byte mid0 = byte( ( ( int ) minColor[0] + maxColor[0] + 1 ) >> 1 );
	byte mid1 = byte( ( ( int ) minColor[1] + maxColor[1] + 1 ) >> 1 );
	byte mid2 = byte( ( ( int ) minColor[2] + maxColor[2] + 1 ) >> 1 );
	
#if 0
	
	// using the covariance is the best way to select the diagonal
	int side0 = 0;
	int side1 = 0;
	for( int i = 0; i < 16; i++ )
	{
		int b0 = colorBlock[i * 4 + 0] - mid0;
		int b1 = colorBlock[i * 4 + 1] - mid1;
		int b2 = colorBlock[i * 4 + 2] - mid2;
		side0 += ( b0 * b1 );
		side1 += ( b1 * b2 );
	}
	byte mask0 = -( side0 < 0 );
	byte mask1 = -( side1 < 0 );
	
#else
	
	// calculating the covariance of just the sign bits is much faster and gives almost the same result
	int side0 = 0;
	int side1 = 0;
	for( int i = 0; i < 16; i++ )
	{
		byte b0 = colorBlock[i * 4 + 0] >= mid0;
		byte b1 = colorBlock[i * 4 + 1] >= mid1;
		byte b2 = colorBlock[i * 4 + 2] >= mid2;
		side0 += ( b0 ^ b1 );
		side1 += ( b1 ^ b2 );
	}
	byte mask0 = -( side0 > 8 );
	byte mask1 = -( side1 > 8 );
	
#endif
	
	byte c0 = minColor[0];
	byte c1 = maxColor[0];
	byte c2 = minColor[2];
	byte c3 = maxColor[2];
	
	c0 ^= c1;
	mask0 &= c0;
	c1 ^= mask0;
	c0 ^= c1;
	
	c2 ^= c3;
	mask1 &= c2;
	c3 ^= mask1;
	c2 ^= c3;
	
	minColor[0] = c0;
	maxColor[0] = c1;
	minColor[2] = c2;
	maxColor[2] = c3;
	
	if( ColorTo565( minColor ) > ColorTo565( maxColor ) )
	{
		SwapValues( minColor[0], maxColor[0] );
		SwapValues( minColor[1], maxColor[1] );
		SwapValues( minColor[2], maxColor[2] );
	}
}

/*
========================
idDxtEncoder::EmitColorIndices

params:	colorBlock	- 16 pixel block for which find color indexes
paramO:	minColor	- Min color found
paramO:	maxColor	- Max color found
return: 4 byte color index block
========================
*/
void idDxtEncoder::EmitColorIndices( const byte* colorBlock, const byte* minColor, const byte* maxColor )
{
#if 1

	ALIGN16( uint16 colors[4][4] );
	unsigned int result = 0;
	
	colors[0][0] = ( maxColor[0] & C565_5_MASK ) | ( maxColor[0] >> 5 );
	colors[0][1] = ( maxColor[1] & C565_6_MASK ) | ( maxColor[1] >> 6 );
	colors[0][2] = ( maxColor[2] & C565_5_MASK ) | ( maxColor[2] >> 5 );
	colors[0][3] = 0;
	colors[1][0] = ( minColor[0] & C565_5_MASK ) | ( minColor[0] >> 5 );
	colors[1][1] = ( minColor[1] & C565_6_MASK ) | ( minColor[1] >> 6 );
	colors[1][2] = ( minColor[2] & C565_5_MASK ) | ( minColor[2] >> 5 );
	colors[1][3] = 0;
	colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
	colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
	colors[2][2] = ( 2 * colors[0][2] + 1 * colors[1][2] ) / 3;
	colors[2][3] = 0;
	colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
	colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
	colors[3][2] = ( 1 * colors[0][2] + 2 * colors[1][2] ) / 3;
	colors[3][3] = 0;
	
	// uses sum of absolute differences instead of squared distance to find the best match
	for( int i = 15; i >= 0; i-- )
	{
		int c0, c1, c2, c3, m, d0, d1, d2, d3;
		
		c0 = colorBlock[i * 4 + 0];
		c1 = colorBlock[i * 4 + 1];
		c2 = colorBlock[i * 4 + 2];
		c3 = colorBlock[i * 4 + 3];
		
		m = colors[0][0] - c0;
		d0 = abs( m );
		m = colors[1][0] - c0;
		d1 = abs( m );
		m = colors[2][0] - c0;
		d2 = abs( m );
		m = colors[3][0] - c0;
		d3 = abs( m );
		
		m = colors[0][1] - c1;
		d0 += abs( m );
		m = colors[1][1] - c1;
		d1 += abs( m );
		m = colors[2][1] - c1;
		d2 += abs( m );
		m = colors[3][1] - c1;
		d3 += abs( m );
		
		m = colors[0][2] - c2;
		d0 += abs( m );
		m = colors[1][2] - c2;
		d1 += abs( m );
		m = colors[2][2] - c2;
		d2 += abs( m );
		m = colors[3][2] - c2;
		d3 += abs( m );
		
#if 0
		int b0 = d0 > d2;
		int b1 = d1 > d3;
		int b2 = d0 > d3;
		int b3 = d1 > d2;
		int b4 = d0 > d1;
		int b5 = d2 > d3;
		
		result |= ( ( !b3 & b4 ) | ( b2 & b5 ) | ( ( ( b0 & b3 ) | ( b1 & b2 ) ) << 1 ) ) << ( i << 1 );
#else
		bool b0 = d0 > d3;
		bool b1 = d1 > d2;
		bool b2 = d0 > d2;
		bool b3 = d1 > d3;
		bool b4 = d2 > d3;
		
		int x0 = b1 & b2;
		int x1 = b0 & b3;
		int x2 = b0 & b4;
		
		result |= ( x2 | ( ( x0 | x1 ) << 1 ) ) << ( i << 1 );
#endif
	}
	
	EmitUInt( result );
	
#elif 1
	
	byte colors[4][4];
	unsigned int indexes[16];
	
	colors[0][0] = ( maxColor[0] & C565_5_MASK ) | ( maxColor[0] >> 6 );
	colors[0][1] = ( maxColor[1] & C565_6_MASK ) | ( maxColor[1] >> 5 );
	colors[0][2] = ( maxColor[2] & C565_5_MASK ) | ( maxColor[2] >> 6 );
	colors[0][3] = 0;
	colors[1][0] = ( minColor[0] & C565_5_MASK ) | ( minColor[0] >> 6 );
	colors[1][1] = ( minColor[1] & C565_6_MASK ) | ( minColor[1] >> 5 );
	colors[1][2] = ( minColor[2] & C565_5_MASK ) | ( minColor[2] >> 6 );
	colors[1][3] = 0;
	colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
	colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
	colors[2][2] = ( 2 * colors[0][2] + 1 * colors[1][2] ) / 3;
	colors[2][3] = 0;
	colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
	colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
	colors[3][2] = ( 1 * colors[0][2] + 2 * colors[1][2] ) / 3;
	colors[3][3] = 0;
	
	for( int i = 0; i < 16; i++ )
	{
		int c0, c1, c2, m, d, minDist;
	
		c0 = colorBlock[i * 4 + 0];
		c1 = colorBlock[i * 4 + 1];
		c2 = colorBlock[i * 4 + 2];
	
		m = colors[0][0] - c0;
		d = m * m;
		m = colors[0][1] - c1;
		d += m * m;
		m = colors[0][2] - c2;
		d += m * m;
	
		minDist = d;
		indexes[i] = 0;
	
		m = colors[1][0] - c0;
		d = m * m;
		m = colors[1][1] - c1;
		d += m * m;
		m = colors[1][2] - c2;
		d += m * m;
	
		if( d < minDist )
		{
			minDist = d;
			indexes[i] = 1;
		}
	
		m = colors[2][0] - c0;
		d = m * m;
		m = colors[2][1] - c1;
		d += m * m;
		m = colors[2][2] - c2;
		d += m * m;
	
		if( d < minDist )
		{
			minDist = d;
			indexes[i] = 2;
		}
	
		m = colors[3][0] - c0;
		d = m * m;
		m = colors[3][1] - c1;
		d += m * m;
		m = colors[3][2] - c2;
		d += m * m;
	
		if( d < minDist )
		{
			minDist = d;
			indexes[i] = 3;
		}
	}
	
	unsigned int result = 0;
	for( int i = 0; i < 16; i++ )
	{
		result |= ( indexes[i] << ( unsigned int )( i << 1 ) );
	}
	
	EmitUInt( result );
	
#else
	
	byte colors[4][4];
	unsigned int indexes[16];
	
	colors[0][0] = ( maxColor[0] & C565_5_MASK ) | ( maxColor[0] >> 6 );
	colors[0][1] = ( maxColor[1] & C565_6_MASK ) | ( maxColor[1] >> 5 );
	colors[0][2] = ( maxColor[2] & C565_5_MASK ) | ( maxColor[2] >> 6 );
	colors[0][3] = 0;
	colors[1][0] = ( minColor[0] & C565_5_MASK ) | ( minColor[0] >> 6 );
	colors[1][1] = ( minColor[1] & C565_6_MASK ) | ( minColor[1] >> 5 );
	colors[1][2] = ( minColor[2] & C565_5_MASK ) | ( minColor[2] >> 6 );
	colors[1][3] = 0;
	colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
	colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
	colors[2][2] = ( 2 * colors[0][2] + 1 * colors[1][2] ) / 3;
	colors[2][3] = 0;
	colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
	colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
	colors[3][2] = ( 1 * colors[0][2] + 2 * colors[1][2] ) / 3;
	colors[3][3] = 0;
	
	for( int i = 0; i < 16; i++ )
	{
		unsigned int minDist = ( 255 * 255 ) * 4;
		for( int j = 0; j < 4; j++ )
		{
			unsigned int dist = ColorDistance( &colorBlock[i * 4], &colors[j][0] );
			if( dist < minDist )
			{
				minDist = dist;
				indexes[i] = j;
			}
		}
	}
	
	unsigned int result = 0;
	for( int i = 0; i < 16; i++ )
	{
		result |= ( indexes[i] << ( unsigned int )( i << 1 ) );
	}
	
	EmitUInt( result );
	
#endif
}

/*
========================
idDxtEncoder::EmitColorAlphaIndices

params:	colorBlock	- 16 pixel block for which find color indexes
paramO:	minColor	- Min color found
paramO:	maxColor	- Max color found
return: 4 byte color index block
========================
*/
void idDxtEncoder::EmitColorAlphaIndices( const byte* colorBlock, const byte* minColor, const byte* maxColor )
{
	ALIGN16( uint16 colors[4][4] );
	unsigned int result = 0;
	
	colors[0][0] = ( minColor[0] & C565_5_MASK ) | ( minColor[0] >> 5 );
	colors[0][1] = ( minColor[1] & C565_6_MASK ) | ( minColor[1] >> 6 );
	colors[0][2] = ( minColor[2] & C565_5_MASK ) | ( minColor[2] >> 5 );
	colors[0][3] = 255;
	colors[1][0] = ( maxColor[0] & C565_5_MASK ) | ( maxColor[0] >> 5 );
	colors[1][1] = ( maxColor[1] & C565_6_MASK ) | ( maxColor[1] >> 6 );
	colors[1][2] = ( maxColor[2] & C565_5_MASK ) | ( maxColor[2] >> 5 );
	colors[1][3] = 255;
	colors[2][0] = ( colors[0][0] + colors[1][0] ) / 2;
	colors[2][1] = ( colors[0][1] + colors[1][1] ) / 2;
	colors[2][2] = ( colors[0][2] + colors[1][2] ) / 2;
	colors[2][3] = 255;
	colors[3][0] = 0;
	colors[3][1] = 0;
	colors[3][2] = 0;
	colors[3][3] = 0;
	
	// uses sum of absolute differences instead of squared distance to find the best match
	for( int i = 15; i >= 0; i-- )
	{
		int c0, c1, c2, c3, m, d0, d1, d2;
		
		c0 = colorBlock[i * 4 + 0];
		c1 = colorBlock[i * 4 + 1];
		c2 = colorBlock[i * 4 + 2];
		c3 = colorBlock[i * 4 + 3];
		
		m = colors[0][0] - c0;
		d0 = abs( m );
		m = colors[1][0] - c0;
		d1 = abs( m );
		m = colors[2][0] - c0;
		d2 = abs( m );
		
		m = colors[0][1] - c1;
		d0 += abs( m );
		m = colors[1][1] - c1;
		d1 += abs( m );
		m = colors[2][1] - c1;
		d2 += abs( m );
		
		m = colors[0][2] - c2;
		d0 += abs( m );
		m = colors[1][2] - c2;
		d1 += abs( m );
		m = colors[2][2] - c2;
		d2 += abs( m );
		
		unsigned int b0 = d2 > d0;
		unsigned int b1 = d2 > d1;
		unsigned int b2 = d1 > d0;
		unsigned int b3 = c3 < 128;
		
		// DG: add some parenthesis to appease (often rightly) warning compiler
		result |= ( ( ( ( b0 & b1 ) | b3 ) << 1 ) | ( ( b2 ^ b1 ) | b3 ) ) << ( i << 1 );
		// DG end
	}
	
	EmitUInt( result );
}

/*
========================
idDxtEncoder::EmitCTX1Indices

params:	colorBlock	- 16 pixel block for which find color indexes
paramO:	minColor	- Min color found
paramO:	maxColor	- Max color found
return: 4 byte color index block
========================
*/
void idDxtEncoder::EmitCTX1Indices( const byte* colorBlock, const byte* minColor, const byte* maxColor )
{
	ALIGN16( uint16 colors[4][2] );
	unsigned int result = 0;
	
	colors[0][0] = maxColor[0];
	colors[0][1] = maxColor[1];
	colors[1][0] = minColor[0];
	colors[1][1] = minColor[1];
	
	colors[2][0] = ( 2 * colors[0][0] + 1 * colors[1][0] ) / 3;
	colors[2][1] = ( 2 * colors[0][1] + 1 * colors[1][1] ) / 3;
	colors[3][0] = ( 1 * colors[0][0] + 2 * colors[1][0] ) / 3;
	colors[3][1] = ( 1 * colors[0][1] + 2 * colors[1][1] ) / 3;
	
	for( int i = 15; i >= 0; i-- )
	{
		int c0, c1, m, d0, d1, d2, d3;
		
		c0 = colorBlock[i * 4 + 0];
		c1 = colorBlock[i * 4 + 1];
		
		m = colors[0][0] - c0;
		d0 = abs( m );
		m = colors[1][0] - c0;
		d1 = abs( m );
		m = colors[2][0] - c0;
		d2 = abs( m );
		m = colors[3][0] - c0;
		d3 = abs( m );
		
		m = colors[0][1] - c1;
		d0 += abs( m );
		m = colors[1][1] - c1;
		d1 += abs( m );
		m = colors[2][1] - c1;
		d2 += abs( m );
		m = colors[3][1] - c1;
		d3 += abs( m );
		
		bool b0 = d0 > d3;
		bool b1 = d1 > d2;
		bool b2 = d0 > d2;
		bool b3 = d1 > d3;
		bool b4 = d2 > d3;
		
		int x0 = b1 & b2;
		int x1 = b0 & b3;
		int x2 = b0 & b4;
		
		result |= ( x2 | ( ( x0 | x1 ) << 1 ) ) << ( i << 1 );
	}
	
	EmitUInt( result );
}

/*
========================
idDxtEncoder::EmitAlphaIndices

params:	colorBlock	- 16 pixel block for which find alpha indexes
paramO:	minAlpha	- Min alpha found
paramO:	maxAlpha	- Max alpha found
========================
*/
void idDxtEncoder::EmitAlphaIndices( const byte* colorBlock, const int offset, const byte minAlpha, const byte maxAlpha )
{

	assert( maxAlpha >= minAlpha );
	
	const int ALPHA_RANGE = 7;
	
#if 1
	
	byte ab1, ab2, ab3, ab4, ab5, ab6, ab7;
	ALIGN16( byte indexes[16] );
	
	ab1 = ( 13 * maxAlpha +  1 * minAlpha + ALPHA_RANGE ) / ( ALPHA_RANGE * 2 );
	ab2 = ( 11 * maxAlpha +  3 * minAlpha + ALPHA_RANGE ) / ( ALPHA_RANGE * 2 );
	ab3 = ( 9 * maxAlpha +  5 * minAlpha + ALPHA_RANGE ) / ( ALPHA_RANGE * 2 );
	ab4 = ( 7 * maxAlpha +  7 * minAlpha + ALPHA_RANGE ) / ( ALPHA_RANGE * 2 );
	ab5 = ( 5 * maxAlpha +  9 * minAlpha + ALPHA_RANGE ) / ( ALPHA_RANGE * 2 );
	ab6 = ( 3 * maxAlpha + 11 * minAlpha + ALPHA_RANGE ) / ( ALPHA_RANGE * 2 );
	ab7 = ( 1 * maxAlpha + 13 * minAlpha + ALPHA_RANGE ) / ( ALPHA_RANGE * 2 );
	
	colorBlock += offset;
	
	for( int i = 0; i < 16; i++ )
	{
		byte a = colorBlock[i * 4];
		int b1 = ( a >= ab1 );
		int b2 = ( a >= ab2 );
		int b3 = ( a >= ab3 );
		int b4 = ( a >= ab4 );
		int b5 = ( a >= ab5 );
		int b6 = ( a >= ab6 );
		int b7 = ( a >= ab7 );
		int index = ( 8 - b1 - b2 - b3 - b4 - b5 - b6 - b7 ) & 7;
		indexes[i] = byte( index ^ ( 2 > index ) );
	}
	
	EmitByte( ( indexes[ 0] >> 0 ) | ( indexes[ 1] << 3 ) | ( indexes[ 2] << 6 ) );
	EmitByte( ( indexes[ 2] >> 2 ) | ( indexes[ 3] << 1 ) | ( indexes[ 4] << 4 ) | ( indexes[ 5] << 7 ) );
	EmitByte( ( indexes[ 5] >> 1 ) | ( indexes[ 6] << 2 ) | ( indexes[ 7] << 5 ) );
	
	EmitByte( ( indexes[ 8] >> 0 ) | ( indexes[ 9] << 3 ) | ( indexes[10] << 6 ) );
	EmitByte( ( indexes[10] >> 2 ) | ( indexes[11] << 1 ) | ( indexes[12] << 4 ) | ( indexes[13] << 7 ) );
	EmitByte( ( indexes[13] >> 1 ) | ( indexes[14] << 2 ) | ( indexes[15] << 5 ) );
	
#elif 0
	
	ALIGN16( byte indexes[16] );
	byte delta = maxAlpha - minAlpha;
	byte half = delta >> 1;
	byte bias = delta / ( 2 * ALPHA_RANGE );
	byte bottom = minAlpha + bias;
	byte top = maxAlpha - bias;
	
	colorBlock += offset;
	
	for( int i = 0; i < 16; i++ )
	{
		byte a = colorBlock[i * 4];
		if( a <= bottom )
		{
			indexes[i] = 1;
		}
		else if( a >= top )
		{
			indexes[i] = 0;
		}
		else
		{
			indexes[i] = ( ALPHA_RANGE + 1 ) + ( ( minAlpha - a ) * ALPHA_RANGE - half ) / delta;
		}
	}
	
	EmitByte( ( indexes[ 0] >> 0 ) | ( indexes[ 1] << 3 ) | ( indexes[ 2] << 6 ) );
	EmitByte( ( indexes[ 2] >> 2 ) | ( indexes[ 3] << 1 ) | ( indexes[ 4] << 4 ) | ( indexes[ 5] << 7 ) );
	EmitByte( ( indexes[ 5] >> 1 ) | ( indexes[ 6] << 2 ) | ( indexes[ 7] << 5 ) );
	
	EmitByte( ( indexes[ 8] >> 0 ) | ( indexes[ 9] << 3 ) | ( indexes[10] << 6 ) );
	EmitByte( ( indexes[10] >> 2 ) | ( indexes[11] << 1 ) | ( indexes[12] << 4 ) | ( indexes[13] << 7 ) );
	EmitByte( ( indexes[13] >> 1 ) | ( indexes[14] << 2 ) | ( indexes[15] << 5 ) );
	
#elif 0
	
	ALIGN16( byte indexes[16] );
	byte delta = maxAlpha - minAlpha;
	byte half = delta >> 1;
	byte bias = delta / ( 2 * ALPHA_RANGE );
	byte bottom = minAlpha + bias;
	byte top = maxAlpha - bias;
	
	colorBlock += offset;
	
	for( int i = 0; i < 16; i++ )
	{
		byte a = colorBlock[i * 4];
		int index = ( ALPHA_RANGE + 1 ) + ( ( minAlpha - a ) * ALPHA_RANGE - half ) / delta;
		int c0 = a > bottom;
		int c1 = a < top;
		indexes[i] = ( index & -( c0 & c1 ) ) | ( c0 ^ 1 );
	}
	
	EmitByte( ( indexes[ 0] >> 0 ) | ( indexes[ 1] << 3 ) | ( indexes[ 2] << 6 ) );
	EmitByte( ( indexes[ 2] >> 2 ) | ( indexes[ 3] << 1 ) | ( indexes[ 4] << 4 ) | ( indexes[ 5] << 7 ) );
	EmitByte( ( indexes[ 5] >> 1 ) | ( indexes[ 6] << 2 ) | ( indexes[ 7] << 5 ) );
	
	EmitByte( ( indexes[ 8] >> 0 ) | ( indexes[ 9] << 3 ) | ( indexes[10] << 6 ) );
	EmitByte( ( indexes[10] >> 2 ) | ( indexes[11] << 1 ) | ( indexes[12] << 4 ) | ( indexes[13] << 7 ) );
	EmitByte( ( indexes[13] >> 1 ) | ( indexes[14] << 2 ) | ( indexes[15] << 5 ) );
	
#else
	
	ALIGN16( byte indexes[16] );
	ALIGN16( byte alphas[8] );
	
	alphas[0] = maxAlpha;
	alphas[1] = minAlpha;
	alphas[2] = ( 6 * maxAlpha + 1 * minAlpha ) / ALPHA_RANGE;
	alphas[3] = ( 5 * maxAlpha + 2 * minAlpha ) / ALPHA_RANGE;
	alphas[4] = ( 4 * maxAlpha + 3 * minAlpha ) / ALPHA_RANGE;
	alphas[5] = ( 3 * maxAlpha + 4 * minAlpha ) / ALPHA_RANGE;
	alphas[6] = ( 2 * maxAlpha + 5 * minAlpha ) / ALPHA_RANGE;
	alphas[7] = ( 1 * maxAlpha + 6 * minAlpha ) / ALPHA_RANGE;
	
	colorBlock += offset;
	
	for( int i = 0; i < 16; i++ )
	{
		int minDist = INT_MAX;
		byte a = colorBlock[i * 4];
		for( int j = 0; j < 8; j++ )
		{
			int dist = abs( a - alphas[j] );
			if( dist < minDist )
			{
				minDist = dist;
				indexes[i] = j;
			}
		}
	}
	
	EmitByte( ( indexes[ 0] >> 0 ) | ( indexes[ 1] << 3 ) | ( indexes[ 2] << 6 ) );
	EmitByte( ( indexes[ 2] >> 2 ) | ( indexes[ 3] << 1 ) | ( indexes[ 4] << 4 ) | ( indexes[ 5] << 7 ) );
	EmitByte( ( indexes[ 5] >> 1 ) | ( indexes[ 6] << 2 ) | ( indexes[ 7] << 5 ) );
	
	EmitByte( ( indexes[ 8] >> 0 ) | ( indexes[ 9] << 3 ) | ( indexes[10] << 6 ) );
	EmitByte( ( indexes[10] >> 2 ) | ( indexes[11] << 1 ) | ( indexes[12] << 4 ) | ( indexes[13] << 7 ) );
	EmitByte( ( indexes[13] >> 1 ) | ( indexes[14] << 2 ) | ( indexes[15] << 5 ) );
	
#endif
}

/*
========================
idDxtEncoder::CompressImageDXT1Fast_Generic

params:	inBuf		- image to compress
paramO:	outBuf		- result of compression
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::CompressImageDXT1Fast_Generic( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte block[64] );
	ALIGN16( byte minColor[4] );
	ALIGN16( byte maxColor[4] );
	
	assert( width >= 4 && ( width & 3 ) == 0 );
	assert( height >= 4 && ( height & 3 ) == 0 );
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	for( int j = 0; j < height; j += 4, inBuf += width * 4 * 4 )
	{
		for( int i = 0; i < width; i += 4 )
		{
		
			ExtractBlock( inBuf + i * 4, width, block );
			
			GetMinMaxBBox( block, minColor, maxColor );
			//SelectColorsDiagonal( block, minColor, maxColor );
			InsetColorsBBox( minColor, maxColor );
			
			EmitUShort( ColorTo565( maxColor ) );
			EmitUShort( ColorTo565( minColor ) );
			
			EmitColorIndices( block, minColor, maxColor );
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
}

/*
========================
idDxtEncoder::CompressImageDXT1AlphaFast_Generic

params:	inBuf		- image to compress
paramO:	outBuf		- result of compression
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::CompressImageDXT1AlphaFast_Generic( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte block[64] );
	ALIGN16( byte minColor[4] );
	ALIGN16( byte maxColor[4] );
	
	assert( width >= 4 && ( width & 3 ) == 0 );
	assert( height >= 4 && ( height & 3 ) == 0 );
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	for( int j = 0; j < height; j += 4, inBuf += width * 4 * 4 )
	{
		for( int i = 0; i < width; i += 4 )
		{
		
			ExtractBlock( inBuf + i * 4, width, block );
			
			GetMinMaxBBox( block, minColor, maxColor );
			byte minAlpha = minColor[3];
			//SelectColorsDiagonal( block, minColor, maxColor );
			InsetColorsBBox( minColor, maxColor );
			
			if( minAlpha >= 128 )
			{
				EmitUShort( ColorTo565( maxColor ) );
				EmitUShort( ColorTo565( minColor ) );
				EmitColorIndices( block, minColor, maxColor );
			}
			else
			{
				EmitUShort( ColorTo565( minColor ) );
				EmitUShort( ColorTo565( maxColor ) );
				EmitColorAlphaIndices( block, minColor, maxColor );
			}
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
}

/*
========================
idDxtEncoder::CompressImageDXT5Fast_Generic

params:	inBuf		- image to compress
paramO:	outBuf		- result of compression
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::CompressImageDXT5Fast_Generic( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte block[64] );
	ALIGN16( byte minColor[4] );
	ALIGN16( byte maxColor[4] );
	
	assert( width >= 4 && ( width & 3 ) == 0 );
	assert( height >= 4 && ( height & 3 ) == 0 );
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	for( int j = 0; j < height; j += 4, inBuf += width * 4 * 4 )
	{
		for( int i = 0; i < width; i += 4 )
		{
		
			ExtractBlock( inBuf + i * 4, width, block );
			
			GetMinMaxBBox( block, minColor, maxColor );
			//SelectColorsDiagonal( block, minColor, maxColor );
			InsetColorsBBox( minColor, maxColor );
			
			EmitByte( maxColor[3] );
			EmitByte( minColor[3] );
			
			EmitAlphaIndices( block, 3, minColor[3], maxColor[3] );
			
#ifdef NVIDIA_7X_HARDWARE_BUG_FIX
			// the colors are already always guaranteed to be sorted properly
#endif
			
			EmitUShort( ColorTo565( maxColor ) );
			EmitUShort( ColorTo565( minColor ) );
			
			EmitColorIndices( block, minColor, maxColor );
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
}

/*
========================
idDxtEncoder::ScaleYCoCg
========================
*/
void idDxtEncoder::ScaleYCoCg( byte* colorBlock, byte* minColor, byte* maxColor ) const
{
	int m0 = abs( minColor[0] - 128 );
	int m1 = abs( minColor[1] - 128 );
	int m2 = abs( maxColor[0] - 128 );
	int m3 = abs( maxColor[1] - 128 );
	
	if( m1 > m0 ) m0 = m1;
	if( m3 > m2 ) m2 = m3;
	if( m2 > m0 ) m0 = m2;
	
	const int s0 = 128 / 2 - 1;
	const int s1 = 128 / 4 - 1;
	
	int mask0 = -( m0 <= s0 );
	int mask1 = -( m0 <= s1 );
	int scale = 1 + ( 1 & mask0 ) + ( 2 & mask1 );
	
	minColor[0] = byte( ( minColor[0] - 128 ) * scale + 128 );
	minColor[1] = byte( ( minColor[1] - 128 ) * scale + 128 );
	minColor[2] = byte( ( scale - 1 ) << 3 );
	maxColor[0] = byte( ( maxColor[0] - 128 ) * scale + 128 );
	maxColor[1] = byte( ( maxColor[1] - 128 ) * scale + 128 );
	maxColor[2] = byte( ( scale - 1 ) << 3 );
	
	for( int i = 0; i < 16; i++ )
	{
		colorBlock[i * 4 + 0] = byte( ( colorBlock[i * 4 + 0] - 128 ) * scale + 128 );
		colorBlock[i * 4 + 1] = byte( ( colorBlock[i * 4 + 1] - 128 ) * scale + 128 );
	}
}

/*
========================
idDxtEncoder::InsetYCoCgBBox
========================
*/
ID_INLINE void idDxtEncoder::InsetYCoCgBBox( byte* minColor, byte* maxColor ) const
{

#if 0

	byte inset[4];
	
	inset[0] = ( maxColor[0] - minColor[0] ) >> INSET_COLOR_SHIFT;
	inset[1] = ( maxColor[1] - minColor[1] ) >> INSET_COLOR_SHIFT;
	inset[3] = ( maxColor[3] - minColor[3] ) >> INSET_ALPHA_SHIFT;
	
	minColor[0] = ( minColor[0] + inset[0] <= 255 ) ? minColor[0] + inset[0] : 255;
	minColor[1] = ( minColor[1] + inset[1] <= 255 ) ? minColor[1] + inset[1] : 255;
	minColor[3] = ( minColor[3] + inset[3] <= 255 ) ? minColor[3] + inset[3] : 255;
	
	maxColor[0] = ( maxColor[0] >= inset[0] ) ? maxColor[0] - inset[0] : 0;
	maxColor[1] = ( maxColor[1] >= inset[1] ) ? maxColor[1] - inset[1] : 0;
	maxColor[3] = ( maxColor[3] >= inset[3] ) ? maxColor[3] - inset[3] : 0;
	
	minColor[0] = ( minColor[0] & C565_5_MASK ) | ( minColor[0] >> 5 );
	minColor[1] = ( minColor[1] & C565_6_MASK ) | ( minColor[1] >> 6 );
	
	maxColor[0] = ( maxColor[0] & C565_5_MASK ) | ( maxColor[0] >> 5 );
	maxColor[1] = ( maxColor[1] & C565_6_MASK ) | ( maxColor[1] >> 6 );
	
#elif 0
	
	float inset[4];
	float minf[4];
	float maxf[4];
	
	for( int i = 0; i < 4; i++ )
	{
		minf[i] = minColor[i] / 255.0f;
		maxf[i] = maxColor[i] / 255.0f;
	}
	
	inset[0] = ( maxf[0] - minf[0] ) / 16.0f;
	inset[1] = ( maxf[1] - minf[1] ) / 16.0f;
	inset[2] = ( maxf[2] - minf[2] ) / 16.0f;
	inset[3] = ( maxf[3] - minf[3] ) / 32.0f;
	
	for( int i = 0; i < 4; i++ )
	{
		minf[i] = ( minf[i] + inset[i] <= 1.0f ) ? minf[i] + inset[i] : 1.0f;
		maxf[i] = ( maxf[i] >= inset[i] ) ? maxf[i] - inset[i] : 0;
	}
	
	minColor[0] = ( ( int )floor( minf[0] * 31 ) ) & ( ( 1 << 5 ) - 1 );
	minColor[1] = ( ( int )floor( minf[1] * 63 ) ) & ( ( 1 << 6 ) - 1 );
	
	maxColor[0] = ( ( int )ceil( maxf[0] * 31 ) ) & ( ( 1 << 5 ) - 1 );
	maxColor[1] = ( ( int )ceil( maxf[1] * 63 ) ) & ( ( 1 << 6 ) - 1 );
	
	minColor[0] = ( minColor[0] << 3 ) | ( minColor[0] >> 2 );
	minColor[1] = ( minColor[1] << 2 ) | ( minColor[1] >> 4 );
	
	maxColor[0] = ( maxColor[0] << 3 ) | ( maxColor[0] >> 2 );
	maxColor[1] = ( maxColor[1] << 2 ) | ( maxColor[1] >> 4 );
	
	minColor[3] = ( int )floor( minf[3] * 255.0f );
	maxColor[3] = ( int )ceil( maxf[3] * 255.0f );
	
#elif 0
	
	int inset[4];
	int mini[4];
	int maxi[4];
	
	inset[0] = ( maxColor[0] - minColor[0] );
	inset[1] = ( maxColor[1] - minColor[1] );
	inset[3] = ( maxColor[3] - minColor[3] );
	
	mini[0] = ( minColor[0] << INSET_COLOR_SHIFT ) + inset[0];
	mini[1] = ( minColor[1] << INSET_COLOR_SHIFT ) + inset[1];
	mini[3] = ( minColor[3] << INSET_ALPHA_SHIFT ) + inset[3];
	
	maxi[0] = ( maxColor[0] << INSET_COLOR_SHIFT ) - inset[0];
	maxi[1] = ( maxColor[1] << INSET_COLOR_SHIFT ) - inset[1];
	maxi[3] = ( maxColor[3] << INSET_ALPHA_SHIFT ) - inset[3];
	
	mini[0] = ( mini[0] - ( ( 1 << ( 3 ) ) - 1 ) ) >> ( INSET_COLOR_SHIFT + 3 );
	mini[1] = ( mini[1] - ( ( 1 << ( 3 ) ) - 1 ) ) >> ( INSET_COLOR_SHIFT + 2 );
	mini[3] = ( mini[3] - ( ( 1 << ( 2 ) ) - 1 ) ) >> ( INSET_ALPHA_SHIFT + 0 );
	
	maxi[0] = ( maxi[0] + ( ( 1 << ( 3 ) ) - 1 ) ) >> ( INSET_COLOR_SHIFT + 3 );
	maxi[1] = ( maxi[1] + ( ( 1 << ( 3 ) ) - 1 ) ) >> ( INSET_COLOR_SHIFT + 2 );
	maxi[3] = ( maxi[3] + ( ( 1 << ( 2 ) ) - 1 ) ) >> ( INSET_ALPHA_SHIFT + 0 );
	
	if( mini[0] < 0 ) mini[0] = 0;
	if( mini[1] < 0 ) mini[1] = 0;
	if( mini[3] < 0 ) mini[3] = 0;
	
	if( maxi[0] > 31 ) maxi[0] = 31;
	if( maxi[1] > 63 ) maxi[1] = 63;
	if( maxi[3] > 255 ) maxi[3] = 255;
	
	minColor[0] = ( mini[0] << 3 ) | ( mini[0] >> 2 );
	minColor[1] = ( mini[1] << 2 ) | ( mini[1] >> 4 );
	minColor[3] = mini[3];
	
	maxColor[0] = ( maxi[0] << 3 ) | ( maxi[0] >> 2 );
	maxColor[1] = ( maxi[1] << 2 ) | ( maxi[1] >> 4 );
	maxColor[3] = maxi[3];
	
#elif 1
	
	int inset[4];
	int mini[4];
	int maxi[4];
	
	inset[0] = ( maxColor[0] - minColor[0] ) - ( ( 1 << ( INSET_COLOR_SHIFT - 1 ) ) - 1 );
	inset[1] = ( maxColor[1] - minColor[1] ) - ( ( 1 << ( INSET_COLOR_SHIFT - 1 ) ) - 1 );
	inset[3] = ( maxColor[3] - minColor[3] ) - ( ( 1 << ( INSET_ALPHA_SHIFT - 1 ) ) - 1 );
	
	mini[0] = ( ( minColor[0] << INSET_COLOR_SHIFT ) + inset[0] ) >> INSET_COLOR_SHIFT;
	mini[1] = ( ( minColor[1] << INSET_COLOR_SHIFT ) + inset[1] ) >> INSET_COLOR_SHIFT;
	mini[3] = ( ( minColor[3] << INSET_ALPHA_SHIFT ) + inset[3] ) >> INSET_ALPHA_SHIFT;
	
	maxi[0] = ( ( maxColor[0] << INSET_COLOR_SHIFT ) - inset[0] ) >> INSET_COLOR_SHIFT;
	maxi[1] = ( ( maxColor[1] << INSET_COLOR_SHIFT ) - inset[1] ) >> INSET_COLOR_SHIFT;
	maxi[3] = ( ( maxColor[3] << INSET_ALPHA_SHIFT ) - inset[3] ) >> INSET_ALPHA_SHIFT;
	
	mini[0] = ( mini[0] >= 0 ) ? mini[0] : 0;
	mini[1] = ( mini[1] >= 0 ) ? mini[1] : 0;
	mini[3] = ( mini[3] >= 0 ) ? mini[3] : 0;
	
	maxi[0] = ( maxi[0] <= 255 ) ? maxi[0] : 255;
	maxi[1] = ( maxi[1] <= 255 ) ? maxi[1] : 255;
	maxi[3] = ( maxi[3] <= 255 ) ? maxi[3] : 255;
	
	minColor[0] = byte( ( mini[0] & C565_5_MASK ) | ( mini[0] >> 5 ) );
	minColor[1] = byte( ( mini[1] & C565_6_MASK ) | ( mini[1] >> 6 ) );
	minColor[3] = byte( mini[3] );
	
	maxColor[0] = byte( ( maxi[0] & C565_5_MASK ) | ( maxi[0] >> 5 ) );
	maxColor[1] = byte( ( maxi[1] & C565_6_MASK ) | ( maxi[1] >> 6 ) );
	maxColor[3] = byte( maxi[3] );
	
#endif
}

/*
========================
idDxtEncoder::InsetYCoCgAlpaBBox
========================
*/
ID_INLINE void idDxtEncoder::InsetYCoCgAlpaBBox( byte* minColor, byte* maxColor ) const
{
	int inset[4];
	int mini[4];
	int maxi[4];
	
	inset[0] = ( maxColor[0] - minColor[0] ) - ( ( 1 << ( INSET_COLOR_SHIFT - 1 ) ) - 1 );
	inset[1] = ( maxColor[1] - minColor[1] ) - ( ( 1 << ( INSET_COLOR_SHIFT - 1 ) ) - 1 );
	inset[2] = ( maxColor[2] - minColor[2] ) - ( ( 1 << ( INSET_COLOR_SHIFT - 1 ) ) - 1 );
	inset[3] = ( maxColor[3] - minColor[3] ) - ( ( 1 << ( INSET_ALPHA_SHIFT - 1 ) ) - 1 );
	
	mini[0] = ( ( minColor[0] << INSET_COLOR_SHIFT ) + inset[0] ) >> INSET_COLOR_SHIFT;
	mini[1] = ( ( minColor[1] << INSET_COLOR_SHIFT ) + inset[1] ) >> INSET_COLOR_SHIFT;
	mini[2] = ( ( minColor[2] << INSET_COLOR_SHIFT ) + inset[2] ) >> INSET_COLOR_SHIFT;
	mini[3] = ( ( minColor[3] << INSET_ALPHA_SHIFT ) + inset[3] ) >> INSET_ALPHA_SHIFT;
	
	maxi[0] = ( ( maxColor[0] << INSET_COLOR_SHIFT ) - inset[0] ) >> INSET_COLOR_SHIFT;
	maxi[1] = ( ( maxColor[1] << INSET_COLOR_SHIFT ) - inset[1] ) >> INSET_COLOR_SHIFT;
	maxi[2] = ( ( maxColor[2] << INSET_COLOR_SHIFT ) - inset[2] ) >> INSET_COLOR_SHIFT;
	maxi[3] = ( ( maxColor[3] << INSET_ALPHA_SHIFT ) - inset[3] ) >> INSET_ALPHA_SHIFT;
	
	mini[0] = ( mini[0] >= 0 ) ? mini[0] : 0;
	mini[1] = ( mini[1] >= 0 ) ? mini[1] : 0;
	mini[2] = ( mini[2] >= 0 ) ? mini[2] : 0;
	mini[3] = ( mini[3] >= 0 ) ? mini[3] : 0;
	
	maxi[0] = ( maxi[0] <= 255 ) ? maxi[0] : 255;
	maxi[1] = ( maxi[1] <= 255 ) ? maxi[1] : 255;
	maxi[2] = ( maxi[2] <= 255 ) ? maxi[2] : 255;
	maxi[3] = ( maxi[3] <= 255 ) ? maxi[3] : 255;
	
	minColor[0] = byte( ( mini[0] & C565_5_MASK ) | ( mini[0] >> 5 ) );
	minColor[1] = byte( ( mini[1] & C565_6_MASK ) | ( mini[1] >> 6 ) );
	minColor[2] = byte( ( mini[2] & C565_5_MASK ) | ( mini[2] >> 5 ) );
	minColor[3] = byte( mini[3] );
	
	maxColor[0] = byte( ( maxi[0] & C565_5_MASK ) | ( maxi[0] >> 5 ) );
	maxColor[1] = byte( ( maxi[1] & C565_6_MASK ) | ( maxi[1] >> 6 ) );
	maxColor[2] = byte( ( maxi[2] & C565_5_MASK ) | ( maxi[2] >> 5 ) );
	maxColor[3] = byte( maxi[3] );
}

/*
========================
idDxtEncoder::SelectYCoCgDiagonal
========================
*/
void idDxtEncoder::SelectYCoCgDiagonal( const byte* colorBlock, byte* minColor, byte* maxColor ) const
{
	byte side = 0;
	
	byte mid0 = byte( ( ( int ) minColor[0] + maxColor[0] + 1 ) >> 1 );
	byte mid1 = byte( ( ( int ) minColor[1] + maxColor[1] + 1 ) >> 1 );
	
	for( int i = 0; i < 16; i++ )
	{
		byte b0 = colorBlock[i * 4 + 0] >= mid0;
		byte b1 = colorBlock[i * 4 + 1] >= mid1;
		side += ( b0 ^ b1 );
	}
	
	byte mask = -( side > 8 );
	
#if defined NVIDIA_7X_HARDWARE_BUG_FIX
	mask &= -( minColor[0] != maxColor[0] );
#endif
	
	byte c0 = minColor[1];
	byte c1 = maxColor[1];
	
	c0 ^= c1;
	mask &= c0;
	c1 ^= mask;
	c0 ^= c1;
	
	minColor[1] = c0;
	maxColor[1] = c1;
}

/*
========================
idDxtEncoder::CompressYCoCgDXT5Fast_Generic

params:	inBuf		- image to compress
paramO:	outBuf		- result of compression
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::CompressYCoCgDXT5Fast_Generic( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte block[64] );
	ALIGN16( byte minColor[4] );
	ALIGN16( byte maxColor[4] );
	
	//assert( HasConstantValuePer4x4Block( inBuf, width, height, 2 ) );
	
	assert( width >= 4 && ( width & 3 ) == 0 );
	assert( height >= 4 && ( height & 3 ) == 0 );
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	for( int j = 0; j < height; j += 4, inBuf += width * 4 * 4 )
	{
		for( int i = 0; i < width; i += 4 )
		{
		
			ExtractBlock( inBuf + i * 4, width, block );
			
			GetMinMaxBBox( block, minColor, maxColor );
			ScaleYCoCg( block, minColor, maxColor );
			InsetYCoCgBBox( minColor, maxColor );
			SelectYCoCgDiagonal( block, minColor, maxColor );
			
			EmitByte( maxColor[3] );
			EmitByte( minColor[3] );
			
			EmitAlphaIndices( block, 3, minColor[3], maxColor[3] );
			
#ifdef NVIDIA_7X_HARDWARE_BUG_FIX
			// the colors are already sorted when selecting the diagonal
#endif
			
			EmitUShort( ColorTo565( maxColor ) );
			EmitUShort( ColorTo565( minColor ) );
			
			EmitColorIndices( block, minColor, maxColor );
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
}

/*
========================
idDxtEncoder::CompressYCoCgAlphaDXT5Fast

params:	inBuf		- image to compress
paramO:	outBuf		- result of compression
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::CompressYCoCgAlphaDXT5Fast( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte block[64] );
	ALIGN16( byte minColor[4] );
	ALIGN16( byte maxColor[4] );
	
	assert( width >= 4 && ( width & 3 ) == 0 );
	assert( height >= 4 && ( height & 3 ) == 0 );
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	for( int j = 0; j < height; j += 4, inBuf += width * 4 * 4 )
	{
		for( int i = 0; i < width; i += 4 )
		{
		
			ExtractBlock( inBuf + i * 4, width, block );
			
			// scale down the chroma of texels that are close to gray with low luminance
			for( int k = 0; k < 16; k++ )
			{
				if( abs( block[k * 4 + 0] - 132 ) <= 8 &&
						abs( block[k * 4 + 2] - 132 ) <= 8 &&
						block[k * 4 + 3] < 96 )
				{
					block[k * 4 + 0] = ( block[k * 4 + 0] - 132 ) / 2 + 132;
					block[k * 4 + 2] = ( block[k * 4 + 2] - 132 ) / 2 + 132;
				}
			}
			
			GetMinMaxBBox( block, minColor, maxColor );
			InsetYCoCgAlpaBBox( minColor, maxColor );
			SelectColorsDiagonal( block, minColor, maxColor );
			
			EmitByte( maxColor[3] );
			EmitByte( minColor[3] );
			
			EmitAlphaIndices( block, 3, minColor[3], maxColor[3] );
			
#ifdef NVIDIA_7X_HARDWARE_BUG_FIX
			// the colors are already sorted when selecting the diagonal
#endif
			
			EmitUShort( ColorTo565( maxColor ) );
			EmitUShort( ColorTo565( minColor ) );
			
			EmitColorIndices( block, minColor, maxColor );
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
}

/*
========================
idDxtEncoder::CompressYCoCgCTX1DXT5AFast_Generic

params:	inBuf		- image to compress
paramO:	outBuf		- result of compression
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::CompressYCoCgCTX1DXT5AFast_Generic( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte block[64] );
	ALIGN16( byte minColor[4] );
	ALIGN16( byte maxColor[4] );
	
	assert( HasConstantValuePer4x4Block( inBuf, width, height, 2 ) );
	
	assert( width >= 4 && ( width & 3 ) == 0 );
	assert( height >= 4 && ( height & 3 ) == 0 );
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	for( int j = 0; j < height; j += 4, inBuf += width * 4 * 4 )
	{
		for( int i = 0; i < width; i += 4 )
		{
		
			ExtractBlock( inBuf + i * 4, width, block );
			
			GetMinMaxBBox( block, minColor, maxColor );
			SelectYCoCgDiagonal( block, minColor, maxColor );
			InsetColorsBBox( minColor, maxColor );
			
			EmitByte( maxColor[3] );
			EmitByte( minColor[3] );
			
			EmitAlphaIndices( block, 3, minColor[3], maxColor[3] );
			
			EmitByte( maxColor[0] );
			EmitByte( maxColor[1] );
			EmitByte( minColor[0] );
			EmitByte( minColor[1] );
			
			EmitCTX1Indices( block, minColor, maxColor );
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
}

/*
========================
idDxtEncoder::EmitGreenIndices

params:	block		- block for which to find green indices
paramO:	minGreen	- Min green found
paramO:	maxGreen	- Max green found
========================
*/
void idDxtEncoder::EmitGreenIndices( const byte* block, const int offset, const byte minGreen, const byte maxGreen )
{

	assert( maxGreen >= minGreen );
	
	const int COLOR_RANGE = 3;
	
#if 1
	
	byte yb1 = ( 5 * maxGreen + 1 * minGreen + COLOR_RANGE ) / ( 2 * COLOR_RANGE );
	byte yb2 = ( 3 * maxGreen + 3 * minGreen + COLOR_RANGE ) / ( 2 * COLOR_RANGE );
	byte yb3 = ( 1 * maxGreen + 5 * minGreen + COLOR_RANGE ) / ( 2 * COLOR_RANGE );
	
	unsigned int result = 0;
	
	block += offset;
	
	for( int i = 15; i >= 0; i-- )
	{
		result <<= 2;
		byte y = block[i * 4];
		int b1 = ( y >= yb1 );
		int b2 = ( y >= yb2 );
		int b3 = ( y >= yb3 );
		int index = ( 4 - b1 - b2 - b3 ) & 3;
		index ^= ( 2 > index );
		result |= index;
	}
	
	EmitUInt( result );
	
#else
	
	byte green[4];
	
	green[0] = maxGreen;
	green[1] = minGreen;
	green[2] = ( 2 * green[0] + 1 * green[1] ) / 3;
	green[3] = ( 1 * green[0] + 2 * green[1] ) / 3;
	
	unsigned int result = 0;
	
	block += offset;
	
	for( int i = 15; i >= 0; i-- )
	{
		result <<= 2;
		byte y = block[i * 4];
		int minDist = INT_MAX;
		int index;
		for( int j = 0; j < 4; j++ )
		{
			int dist = abs( y - green[j] );
			if( dist < minDist )
			{
				minDist = dist;
				index = j;
			}
		}
		result |= index;
	}
	
	EmitUInt( result );
	
#endif
}

/*
========================
idDxtEncoder::InsetNormalsBBoxDXT5
========================
*/
void idDxtEncoder::InsetNormalsBBoxDXT5( byte* minNormal, byte* maxNormal ) const
{
	int inset[4];
	int mini[4];
	int maxi[4];
	
	inset[3] = ( maxNormal[3] - minNormal[3] ) - ( ( 1 << ( INSET_ALPHA_SHIFT - 1 ) ) - 1 );
	inset[1] = ( maxNormal[1] - minNormal[1] ) - ( ( 1 << ( INSET_COLOR_SHIFT - 1 ) ) - 1 );
	
	mini[3] = ( ( minNormal[3] << INSET_ALPHA_SHIFT ) + inset[3] ) >> INSET_ALPHA_SHIFT;
	mini[1] = ( ( minNormal[1] << INSET_COLOR_SHIFT ) + inset[1] ) >> INSET_COLOR_SHIFT;
	
	maxi[3] = ( ( maxNormal[3] << INSET_ALPHA_SHIFT ) - inset[3] ) >> INSET_ALPHA_SHIFT;
	maxi[1] = ( ( maxNormal[1] << INSET_COLOR_SHIFT ) - inset[1] ) >> INSET_COLOR_SHIFT;
	
	mini[3] = ( mini[3] >= 0 ) ? mini[3] : 0;
	mini[1] = ( mini[1] >= 0 ) ? mini[1] : 0;
	
	maxi[3] = ( maxi[3] <= 255 ) ? maxi[3] : 255;
	maxi[1] = ( maxi[1] <= 255 ) ? maxi[1] : 255;
	
	minNormal[3] = byte( mini[3] );
	minNormal[1] = byte( ( mini[1] & C565_6_MASK ) | ( mini[1] >> 6 ) );
	
	maxNormal[3] = byte( maxi[3] );
	maxNormal[1] = byte( ( maxi[1] & C565_6_MASK ) | ( maxi[1] >> 6 ) );
}

/*
========================
idDxtEncoder::InsetNormalsBBox3Dc
========================
*/
void idDxtEncoder::InsetNormalsBBox3Dc( byte* minNormal, byte* maxNormal ) const
{
	int inset[4];
	int mini[4];
	int maxi[4];
	
	inset[0] = ( maxNormal[0] - minNormal[0] ) - ( ( 1 << ( INSET_ALPHA_SHIFT - 1 ) ) - 1 );
	inset[1] = ( maxNormal[1] - minNormal[1] ) - ( ( 1 << ( INSET_ALPHA_SHIFT - 1 ) ) - 1 );
	
	mini[0] = ( ( minNormal[0] << INSET_ALPHA_SHIFT ) + inset[0] ) >> INSET_ALPHA_SHIFT;
	mini[1] = ( ( minNormal[1] << INSET_ALPHA_SHIFT ) + inset[1] ) >> INSET_ALPHA_SHIFT;
	
	maxi[0] = ( ( maxNormal[0] << INSET_ALPHA_SHIFT ) - inset[0] ) >> INSET_ALPHA_SHIFT;
	maxi[1] = ( ( maxNormal[1] << INSET_ALPHA_SHIFT ) - inset[1] ) >> INSET_ALPHA_SHIFT;
	
	mini[0] = ( mini[0] >= 0 ) ? mini[0] : 0;
	mini[1] = ( mini[1] >= 0 ) ? mini[1] : 0;
	
	maxi[0] = ( maxi[0] <= 255 ) ? maxi[0] : 255;
	maxi[1] = ( maxi[1] <= 255 ) ? maxi[1] : 255;
	
	minNormal[0] = ( byte )mini[0];
	minNormal[1] = ( byte )mini[1];
	
	maxNormal[0] = ( byte )maxi[0];
	maxNormal[1] = ( byte )maxi[1];
}

/*
========================
idDxtEncoder::CompressNormalMapDXT5Fast_Generic

params:	inBuf		- image to compress
paramO:	outBuf		- result of compression
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::CompressNormalMapDXT5Fast_Generic( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte block[64] );
	ALIGN16( byte normal1[4] );
	ALIGN16( byte normal2[4] );
	
	assert( width >= 4 && ( width & 3 ) == 0 );
	assert( height >= 4 && ( height & 3 ) == 0 );
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	for( int j = 0; j < height; j += 4, inBuf += width * 4 * 4 )
	{
		for( int i = 0; i < width; i += 4 )
		{
		
			ExtractBlock( inBuf + i * 4, width, block );
			
			GetMinMaxBBox( block, normal1, normal2 );
			InsetNormalsBBoxDXT5( normal1, normal2 );
			
			// Write out Nx into alpha channel.
			EmitByte( normal2[3] );
			EmitByte( normal1[3] );
			EmitAlphaIndices( block, 3, normal1[3], normal2[3] );
			
			// Write out Ny into green channel.
			EmitUShort( ColorTo565( block[0], normal2[1], block[2] ) );
			EmitUShort( ColorTo565( block[0], normal1[1], block[2] ) );
			EmitGreenIndices( block, 1, normal1[1], normal2[1] );
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
}

/*
========================
idDxtEncoder::CompressImageDXN1Fast_Generic

params:	inBuf		- image to compress
paramO:	outBuf		- result of compression
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::CompressImageDXN1Fast_Generic( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte block[64] );
	ALIGN16( byte min[4] );
	ALIGN16( byte max[4] );
	
	assert( width >= 4 && ( width & 3 ) == 0 );
	assert( height >= 4 && ( height & 3 ) == 0 );
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	for( int j = 0; j < height; j += 4, inBuf += width * 4 * 4 )
	{
		for( int i = 0; i < width; i += 4 )
		{
		
			ExtractBlock( inBuf + i * 4, width, block );
			
			GetMinMaxBBox( block, min, max );
			InsetNormalsBBox3Dc( min, max );
			
			// Write out an alpha channel.
			EmitByte( max[0] );
			EmitByte( min[0] );
			EmitAlphaIndices( block, 0, min[0], max[0] );
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
}

/*
========================
idDxtEncoder::CompressNormalMapDXN2Fast_Generic

params:	inBuf		- image to compress
paramO:	outBuf		- result of compression
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::CompressNormalMapDXN2Fast_Generic( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte block[64] );
	ALIGN16( byte normal1[4] );
	ALIGN16( byte normal2[4] );
	
	assert( width >= 4 && ( width & 3 ) == 0 );
	assert( height >= 4 && ( height & 3 ) == 0 );
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	for( int j = 0; j < height; j += 4, inBuf += width * 4 * 4 )
	{
		for( int i = 0; i < width; i += 4 )
		{
		
			ExtractBlock( inBuf + i * 4, width, block );
			
			GetMinMaxBBox( block, normal1, normal2 );
			InsetNormalsBBox3Dc( normal1, normal2 );
			
			// Write out Nx as an alpha channel.
			EmitByte( normal2[0] );
			EmitByte( normal1[0] );
			EmitAlphaIndices( block, 0, normal1[0], normal2[0] );
			
			// Write out Ny as an alpha channel.
			EmitByte( normal2[1] );
			EmitByte( normal1[1] );
			EmitAlphaIndices( block, 1, normal1[1], normal2[1] );
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
}

/*
========================
idDxtEncoder::DecodeDXNAlphaValues
========================
*/
void idDxtEncoder::DecodeDXNAlphaValues( const byte* inBuf, byte* values )
{
	int i;
	unsigned int indices;
	byte alphas[8];
	
	if( inBuf[0] <= inBuf[1] )
	{
		alphas[0] = inBuf[0];
		alphas[1] = inBuf[1];
		alphas[2] = ( 4 * alphas[0] + 1 * alphas[1] ) / 5;
		alphas[3] = ( 3 * alphas[0] + 2 * alphas[1] ) / 5;
		alphas[4] = ( 2 * alphas[0] + 3 * alphas[1] ) / 5;
		alphas[5] = ( 1 * alphas[0] + 4 * alphas[1] ) / 5;
		alphas[6] = 0;
		alphas[7] = 255;
	}
	else
	{
		alphas[0] = inBuf[0];
		alphas[1] = inBuf[1];
		alphas[2] = ( 6 * alphas[0] + 1 * alphas[1] ) / 7;
		alphas[3] = ( 5 * alphas[0] + 2 * alphas[1] ) / 7;
		alphas[4] = ( 4 * alphas[0] + 3 * alphas[1] ) / 7;
		alphas[5] = ( 3 * alphas[0] + 4 * alphas[1] ) / 7;
		alphas[6] = ( 2 * alphas[0] + 5 * alphas[1] ) / 7;
		alphas[7] = ( 1 * alphas[0] + 6 * alphas[1] ) / 7;
	}
	
	indices = ( int )inBuf[2] | ( ( int )inBuf[3] << 8 ) | ( ( int )inBuf[4] << 16 );
	for( i = 0; i < 8; i++ )
	{
		values[i] = alphas[indices & 7];
		indices >>= 3;
	}
	
	indices = ( int )inBuf[5] | ( ( int )inBuf[6] << 8 ) | ( ( int )inBuf[7] << 16 );
	for( i = 8; i < 16; i++ )
	{
		values[i] = alphas[indices & 7];
		indices >>= 3;
	}
}

/*
========================
idDxtEncoder::EncodeNormalRGBIndices

params:	values	- 16 normal block for which to find normal Y indices
paramO:	min		- Min grayscale value
paramO:	max		- Max grayscale value
========================
*/
void idDxtEncoder::EncodeNormalRGBIndices( byte* outBuf, const byte min, const byte max, const byte* values )
{

	const int COLOR_RANGE = 3;
	
	byte maskedMin, maskedMax, mid, yb1, yb2, yb3;
	
	maskedMax = max & C565_6_MASK;
	maskedMin = min & C565_6_MASK;
	mid = ( maskedMax - maskedMin ) / ( 2 * COLOR_RANGE );
	
	yb1 = maskedMax - mid;
	yb2 = ( 2 * maskedMax + 1 * maskedMin ) / COLOR_RANGE - mid;
	yb3 = ( 1 * maskedMax + 2 * maskedMin ) / COLOR_RANGE - mid;
	
	unsigned int result = 0;
	
	for( int i = 15; i >= 0; i-- )
	{
		result <<= 2;
		byte y = values[i];
		int b1 = ( y >= yb1 );
		int b2 = ( y >= yb2 );
		int b3 = ( y >= yb3 );
		int index = ( 4 - b1 - b2 - b3 ) & 3;
		index ^= ( 2 > index );
		result |= index;
	}
	
	unsigned short maskedMax5 = ( max & C565_5_MASK ) >> 3;
	unsigned short maskedMin5 = ( min & C565_5_MASK ) >> 3;
	
	unsigned short smax = ( maskedMax5 << 11 ) | ( maskedMax << 3 ) | maskedMax5;
	unsigned short smin = ( maskedMin5 << 11 ) | ( maskedMin << 3 ) | maskedMin5;
	
	outBuf[0] = byte( ( smax >> 0 ) & 0xFF );
	outBuf[1] = byte( ( smax >> 8 ) & 0xFF );
	outBuf[2] = byte( ( smin >> 0 ) & 0xFF );
	outBuf[3] = byte( ( smin >> 8 ) & 0xFF );
	
	outBuf[4] = byte( ( result >>  0 ) & 0xFF );
	outBuf[5] = byte( ( result >>  8 ) & 0xFF );
	outBuf[6] = byte( ( result >> 16 ) & 0xFF );
	outBuf[7] = byte( ( result >> 24 ) & 0xFF );
}

/*
========================
idDxtEncoder::ConvertNormalMapDXN2_DXT5

params:	inBuf		- normal map compressed in DXN2 format
paramO:	outBuf		- result of compression in DXT5 format
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::ConvertNormalMapDXN2_DXT5( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte values[16] );
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	if( width > 4 && ( width & 3 ) != 0 )
	{
		return;
	}
	if( height > 4 && ( height & 3 ) != 0 )
	{
		return;
	}
	
	if( width < 4 || height < 4 )
	{
		assert( 0 );
		return;
	}
	
	for( int j = 0; j < height; j += 4 )
	{
		for( int i = 0; i < width; i += 4, inBuf += 16, outBuf += 16 )
		{
		
			// decode normal Y stored as a DXT5 alpha channel
			DecodeDXNAlphaValues( inBuf + 0, values );
			
			// copy normal X
			memcpy( outBuf + 0, inBuf + 8, 8 );
			
			// get the min/max Y
			byte minNormalY = 255;
			byte maxNormalY = 0;
			for( int i = 0; i < 16; i++ )
			{
				if( values[i] < minNormalY )
				{
					minNormalY = values[i];
				}
				if( values[i] > maxNormalY )
				{
					maxNormalY = values[i];
				}
			}
			
			// encode normal Y into DXT5 color channels
			EncodeNormalRGBIndices( outBuf + 8, minNormalY, maxNormalY, values );
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
}

/*
========================
idDxtEncoder::DecodeNormalYValues
========================
*/
void idDxtEncoder::DecodeNormalYValues( const byte* inBuf, byte& min, byte& max, byte* values )
{
	int i;
	unsigned int indexes;
	unsigned short normal0, normal1;
	byte normalsY[4];
	
	normal0 = inBuf[0] | ( inBuf[1] << 8 );
	normal1 = inBuf[2] | ( inBuf[3] << 8 );
	
	assert( normal0 >= normal1 );
	
	normalsY[0] = GreenFrom565( normal0 );
	normalsY[1] = GreenFrom565( normal1 );
	normalsY[2] = ( 2 * normalsY[0] + 1 * normalsY[1] ) / 3;
	normalsY[3] = ( 1 * normalsY[0] + 2 * normalsY[1] ) / 3;
	
	indexes = ( unsigned int )inBuf[4] | ( ( unsigned int )inBuf[5] << 8 ) | ( ( unsigned int )inBuf[6] << 16 ) | ( ( unsigned int )inBuf[7] << 24 );
	for( i = 0; i < 16; i++ )
	{
		values[i] = normalsY[indexes & 3];
		indexes >>= 2;
	}
	
	max = normalsY[0];
	min = normalsY[1];
}

/*
========================
idDxtEncoder::EncodeDXNAlphaValues
========================
*/
void idDxtEncoder::EncodeDXNAlphaValues( byte* outBuf, const byte min, const byte max, const byte* values )
{
	int i;
	byte alphas[8];
	int j;
	unsigned int indexes[16];
	
	alphas[0] = max;
	alphas[1] = min;
	alphas[2] = ( 6 * alphas[0] + 1 * alphas[1] ) / 7;
	alphas[3] = ( 5 * alphas[0] + 2 * alphas[1] ) / 7;
	alphas[4] = ( 4 * alphas[0] + 3 * alphas[1] ) / 7;
	alphas[5] = ( 3 * alphas[0] + 4 * alphas[1] ) / 7;
	alphas[6] = ( 2 * alphas[0] + 5 * alphas[1] ) / 7;
	alphas[7] = ( 1 * alphas[0] + 6 * alphas[1] ) / 7;
	
	int error = 0;
	for( i = 0; i < 16; i++ )
	{
		int minDist = MAX_TYPE( int );
		byte a = values[i];
		for( j = 0; j < 8; j++ )
		{
			int dist = AlphaDistance( a, alphas[j] );
			if( dist < minDist )
			{
				minDist = dist;
				indexes[i] = j;
			}
		}
		error += minDist;
	}
	
	outBuf[0] = max;
	outBuf[1] = min;
	
	outBuf[2] = byte( ( indexes[ 0] >> 0 ) | ( indexes[ 1] << 3 ) | ( indexes[ 2] << 6 ) );
	outBuf[3] = byte( ( indexes[ 2] >> 2 ) | ( indexes[ 3] << 1 ) | ( indexes[ 4] << 4 ) | ( indexes[ 5] << 7 ) );
	outBuf[4] = byte( ( indexes[ 5] >> 1 ) | ( indexes[ 6] << 2 ) | ( indexes[ 7] << 5 ) );
	
	outBuf[5] = byte( ( indexes[ 8] >> 0 ) | ( indexes[ 9] << 3 ) | ( indexes[10] << 6 ) );
	outBuf[6] = byte( ( indexes[10] >> 2 ) | ( indexes[11] << 1 ) | ( indexes[12] << 4 ) | ( indexes[13] << 7 ) );
	outBuf[7] = byte( ( indexes[13] >> 1 ) | ( indexes[14] << 2 ) | ( indexes[15] << 5 ) );
}

/*
========================
idDxtEncoder::ConvertNormalMapDXT5_DXN2

params:	inBuf		- image to compress
paramO:	outBuf		- result of compression
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::ConvertNormalMapDXT5_DXN2( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte values[16] );
	byte minNormalY, maxNormalY;
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	if( width > 4 && ( width & 3 ) != 0 )
	{
		return;
	}
	if( height > 4 && ( height & 3 ) != 0 )
	{
		return;
	}
	
	if( width < 4 || height < 4 )
	{
		assert( 0 );
		return;
	}
	
	for( int j = 0; j < height; j += 4 )
	{
		for( int i = 0; i < width; i += 4, inBuf += 16, outBuf += 16 )
		{
		
			// decode normal Y stored as a DXT5 alpha channel
			DecodeNormalYValues( inBuf + 8, minNormalY, maxNormalY, values );
			
			memcpy( outBuf + 8, inBuf + 0, 8 );
			
			// encode normal Y into DXT5 green channel
			EncodeDXNAlphaValues( outBuf + 0, minNormalY, maxNormalY, values );
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
}

/*
========================
idDxtEncoder::ConvertImageDXN1_DXT1

params:	inBuf		- normal map compressed in DXN1 format
paramO:	outBuf		- result of compression in DXT1 format
params:	width		- width of image
params:	height		- height of image
========================
*/
void idDxtEncoder::ConvertImageDXN1_DXT1( const byte* inBuf, byte* outBuf, int width, int height )
{
	ALIGN16( byte values[16] );
	
	this->width = width;
	this->height = height;
	this->outData = outBuf;
	
	if( width > 4 && ( width & 3 ) != 0 )
	{
		return;
	}
	if( height > 4 && ( height & 3 ) != 0 )
	{
		return;
	}
	
	if( width < 4 || height < 4 )
	{
		assert( 0 );
		return;
	}
	
	for( int j = 0; j < height; j += 4 )
	{
		for( int i = 0; i < width; i += 4, inBuf += 8, outBuf += 8 )
		{
		
			// decode single channel stored as a DXT5 alpha channel
			DecodeDXNAlphaValues( inBuf + 0, values );
			
			// get the min/max
			byte min = 255;
			byte max = 0;
			for( int i = 0; i < 16; i++ )
			{
				if( values[i] < min )
				{
					min = values[i];
				}
				if( values[i] > max )
				{
					max = values[i];
				}
			}
			
			// encode single channel into DXT1
			EncodeNormalRGBIndices( outBuf + 0, min, max, values );
		}
		outData += dstPadding;
		inBuf += srcPadding;
	}
}