// Copyright (C) 2007 Id Software, Inc.
//

#include "../precompiled.h"
#pragma hdrstop

template< typename T > ID_INLINE bool Is1ByteUTF8SequenceStart( const T value ) { return ( ( value & 0x80 ) == 0x00 ); }
template< typename T > ID_INLINE bool Is2ByteUTF8SequenceStart( const T value ) { return ( ( value & 0xE0 ) == 0xC0 ); }
template< typename T > ID_INLINE bool Is3ByteUTF8SequenceStart( const T value ) { return ( ( value & 0xF0 ) == 0xE0 ); }
template< typename T > ID_INLINE bool IsValidUTF8Sequence( const T value ) { return ( ( value & 0xC0 ) == 0x80 ); }

template< typename T > ID_INLINE bool Is3ByteUTF8Sequence( const T value ) { return value > 0x7FF; }
template< typename T > ID_INLINE bool Is2ByteUTF8Sequence( const T value ) { return value > 0x7F; }

/*
============
sdUTF8::sdUTF8
============
*/
sdUTF8::sdUTF8( idFile* file ) {
	Init();
	EnsureAlloced( file->Length() );
	len = alloced;
	file->Read( data, len );
}

/*
============
sdUTF8::sdUTF8
============
*/
sdUTF8::sdUTF8( const byte* data, const int size ) {
	Init();
	EnsureAlloced( size );
	len = alloced;
	::memcpy( this->data, data, len );
}

/*
============
sdUTF8::DecodeLength
============
*/
int sdUTF8::DecodeLength() const {
	// count the number of characters in the UTF-8 data
	int length = 0;

	const byte* ptr = data;
	
	while ( ( ptr - data ) < len ) {
		if ( Is1ByteUTF8SequenceStart( *ptr ) ) {
			length++;
			ptr += 1;
			continue;
		} else if ( Is2ByteUTF8SequenceStart( *ptr ) && IsValidUTF8Sequence( *(ptr + 1) ) ) {
			length++;
			ptr += 2;
			continue;
		} else if ( Is3ByteUTF8SequenceStart( *ptr ) && IsValidUTF8Sequence( *(ptr + 1) ) && IsValidUTF8Sequence( *(ptr + 2) ) ) {
			length++;
			ptr += 3;
			continue;
		} else {
			// malformed UTF-8 data
			//assert( false );
			length++;
			ptr += 1; 
		}
	}

	return length;
}

/*
============
sdUTF8::Decode
============
*/
int sdUTF8::Decode( wchar_t* to ) const {
	int i = 0;
	int decodeLength;
	wchar_t* ptr = to;

	while ( i < len ) {
		decodeLength = UTF8toUCS2( data + i, len - i, ptr );

		if ( decodeLength < 0 ) {
			break;
		}

		i += decodeLength;
		ptr += 1;
	}

	*ptr = L'\0';

	return ( ptr - to );
}


/*
============
sdUTF8::Encode
============
*/
void sdUTF8::Encode( idFile* file, const wchar_t* data, int len ) {
	int index = 0;

	while( index < len ) {
		if( Is3ByteUTF8Sequence( data[ index ] ) ) {
			file->WriteUnsignedChar( 0xE0 | ( data[ index ] >> 12 ) );
			file->WriteUnsignedChar( 0x80 | ( ( data[ index ] >> 6 ) & 0x3F ) );
			file->WriteUnsignedChar( 0x80 | ( data[ index ] & 0x3F ) );
		} else if( Is2ByteUTF8Sequence( data[ index ] ) ) {
			file->WriteUnsignedChar( 0xC0 | ( ( data[ index ] >> 6 ) & 0x1F ) );
			file->WriteUnsignedChar( 0x80 | ( data[ index ] & 0x3F ) );
		} else {			
			file->WriteUnsignedChar( data[ index ] );
		}
		index++;
	}
}

/*
============
sdUTF8::UTF8toUCS2
============
*/
int sdUTF8::UTF8toUCS2( const byte* data, const int len, wchar_t* ucs2 ) const {
	wchar_t b0, b1, b2;

	if ( len < 1 ) {
		return -1;
	}

	b0 = static_cast< wchar_t >( *data );

	if ( Is1ByteUTF8SequenceStart( b0 ) ) {
		*ucs2 = ( b0 & 0x7F );
		return 1;
	} else if ( Is2ByteUTF8SequenceStart( b0 ) ) {
		if ( len < 2 ) {
			return -1;
		}
		b1 = static_cast< wchar_t >( *(data + 1) );
		if ( !IsValidUTF8Sequence( b1 ) ) {
			return -2;
		}
		*ucs2 = ( ( b0 & 0x1F ) << 6 ) | ( b1 & 0x3F );
		return 2;
	} else if ( Is3ByteUTF8SequenceStart( b0 ) ) {
		if ( len < 3 ) {
			return -1;
		}
		b1 = static_cast< wchar_t >( *(data + 1) );
		b2 = static_cast< wchar_t >( *(data + 2) );
		if ( !IsValidUTF8Sequence( b1 ) || !IsValidUTF8Sequence( b2 ) ) {
			return -2;
		}
		*ucs2 = ( ( b0 & 0x0F ) << 12 ) | ( ( b1 & 0x3F ) << 6 ) | ( b2 & 0x3F );
		return 3;
	} else {
		return -2;
	}
}