raze-gles/source/build/src/fix16.c

#include "fix16.h"
#include "fix16_int64.h"

/* Subtraction and addition with overflow detection.
 * The versions without overflow detection are inlined in the header.
 */
#ifndef FIXMATH_NO_OVERFLOW
fix16_t fix16_add(fix16_t a, fix16_t b)
{
	// Use unsigned integers because overflow with signed integers is
	// an undefined operation (http://www.airs.com/blog/archives/120).
	uint32_t _a = a, _b = b;
	uint32_t sum = _a + _b;

	// Overflow can only happen if sign of a == sign of b, and then
	// it causes sign of sum != sign of a.
	if (!((_a ^ _b) & 0x80000000) && ((_a ^ sum) & 0x80000000))
		return fix16_overflow;

	return sum;
}

fix16_t fix16_sub(fix16_t a, fix16_t b)
{
	uint32_t _a = a, _b = b;
	uint32_t diff = _a - _b;

	// Overflow can only happen if sign of a != sign of b, and then
	// it causes sign of diff != sign of a.
	if (((_a ^ _b) & 0x80000000) && ((_a ^ diff) & 0x80000000))
		return fix16_overflow;

	return diff;
}

/* Saturating arithmetic */
fix16_t fix16_sadd(fix16_t a, fix16_t b)
{
	fix16_t result = fix16_add(a, b);

	if (result == fix16_overflow)
		return (a >= 0) ? fix16_maximum : fix16_minimum;

	return result;
}

fix16_t fix16_ssub(fix16_t a, fix16_t b)
{
	fix16_t result = fix16_sub(a, b);

	if (result == fix16_overflow)
		return (a >= 0) ? fix16_maximum : fix16_minimum;

	return result;
}
#endif


/* 64-bit implementation for fix16_mul. Fastest version for e.g. ARM Cortex M3.
 * Performs a 32*32 -> 64bit multiplication. The middle 32 bits are the result,
 * bottom 16 bits are used for rounding, and upper 16 bits are used for overflow
 * detection.
 */

fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1)
{
	int64_t product = (int64_t)inArg0 * inArg1;

	#ifndef FIXMATH_NO_OVERFLOW
	// The upper 17 bits should all be the same (the sign).
	uint32_t upper = (product >> 47);
	#endif

	if (product < 0)
	{
		#ifndef FIXMATH_NO_OVERFLOW
		if (~upper)
				return fix16_overflow;
		#endif

		#ifndef FIXMATH_NO_ROUNDING
		// This adjustment is required in order to round -1/2 correctly
		product--;
		#endif
	}
	else
	{
		#ifndef FIXMATH_NO_OVERFLOW
		if (upper)
				return fix16_overflow;
		#endif
	}

	#ifdef FIXMATH_NO_ROUNDING
	return product >> 16;
	#else
	fix16_t result = product >> 16;
	result += (product & 0x8000) >> 15;

	return result;
	#endif
}

#ifndef FIXMATH_NO_OVERFLOW
/* Wrapper around fix16_mul to add saturating arithmetic. */
fix16_t fix16_smul(fix16_t inArg0, fix16_t inArg1)
{
	fix16_t result = fix16_mul(inArg0, inArg1);

	if (result == fix16_overflow)
	{
		if ((inArg0 >= 0) == (inArg1 >= 0))
			return fix16_maximum;
		else
			return fix16_minimum;
	}

	return result;
}
#endif

/* 32-bit implementation of fix16_div. Fastest version for e.g. ARM Cortex M3.
 * Performs 32-bit divisions repeatedly to reduce the remainder. For this to
 * be efficient, the processor has to have 32-bit hardware division.
 */
#ifdef __GNUC__
// Count leading zeros, using processor-specific instruction if available.
#define clz(x) (__builtin_clzl(x) - (8 * sizeof(long) - 32))
#else
static uint8_t clz(uint32_t x)
{
	uint8_t result = 0;
	if (x == 0) return 32;
	while (!(x & 0xF0000000)) { result += 4; x <<= 4; }
	while (!(x & 0x80000000)) { result += 1; x <<= 1; }
	return result;
}
#endif

fix16_t fix16_div(fix16_t a, fix16_t b)
{
	// This uses a hardware 32/32 bit division multiple times, until we have
	// computed all the bits in (a<<17)/b. Usually this takes 1-3 iterations.

	if (b == 0)
			return fix16_minimum;

	uint32_t remainder = (a >= 0) ? a : (-a);
	uint32_t divider = (b >= 0) ? b : (-b);
	uint32_t quotient = 0;
	int bit_pos = 17;

	// Kick-start the division a bit.
	// This improves speed in the worst-case scenarios where N and D are large
	// It gets a lower estimate for the result by N/(D >> 17 + 1).
	if (divider & 0xFFF00000)
	{
		uint32_t shifted_div = ((divider >> 17) + 1);
		quotient = remainder / shifted_div;
		remainder -= ((uint64_t)quotient * divider) >> 17;
	}

	// If the divider is divisible by 2^n, take advantage of it.
	while (!(divider & 0xF) && bit_pos >= 4)
	{
		divider >>= 4;
		bit_pos -= 4;
	}

	while (remainder && bit_pos >= 0)
	{
		// Shift remainder as much as we can without overflowing
		int shift = clz(remainder);
		if (shift > bit_pos) shift = bit_pos;
		remainder <<= shift;
		bit_pos -= shift;

		uint32_t div = remainder / divider;
		remainder = remainder % divider;
		quotient += div << bit_pos;

		#ifndef FIXMATH_NO_OVERFLOW
		if (div & ~(0xFFFFFFFF >> bit_pos))
				return fix16_overflow;
		#endif

		remainder <<= 1;
		bit_pos--;
	}

	#ifndef FIXMATH_NO_ROUNDING
	// Quotient is always positive so rounding is easy
	quotient++;
	#endif

	fix16_t result = quotient >> 1;

	// Figure out the sign of the result
	if ((a ^ b) & 0x80000000)
	{
		#ifndef FIXMATH_NO_OVERFLOW
		if (result == fix16_minimum)
				return fix16_overflow;
		#endif

		result = -result;
	}

	return result;
}

#ifndef FIXMATH_NO_OVERFLOW
/* Wrapper around fix16_div to add saturating arithmetic. */
fix16_t fix16_sdiv(fix16_t inArg0, fix16_t inArg1)
{
	fix16_t result = fix16_div(inArg0, inArg1);

	if (result == fix16_overflow)
	{
		if ((inArg0 >= 0) == (inArg1 >= 0))
			return fix16_maximum;
		else
			return fix16_minimum;
	}

	return result;
}
#endif

fix16_t fix16_mod(fix16_t x, fix16_t y) { return x %= y; }


#ifndef FIXMATH_NO_64BIT

fix16_t fix16_lerp8(fix16_t inArg0, fix16_t inArg1, uint8_t inFract)
{
	int64_t tempOut = int64_mul_i32_i32(inArg0, ((1 << 8) - inFract));
	tempOut = int64_add(tempOut, int64_mul_i32_i32(inArg1, inFract));
	tempOut = int64_shift(tempOut, -8);
	return (fix16_t)int64_lo(tempOut);
}

fix16_t fix16_lerp16(fix16_t inArg0, fix16_t inArg1, uint16_t inFract)
{
	int64_t tempOut = int64_mul_i32_i32(inArg0, (((int32_t)1 << 16) - inFract));
	tempOut = int64_add(tempOut, int64_mul_i32_i32(inArg1, inFract));
	tempOut = int64_shift(tempOut, -16);
	return (fix16_t)int64_lo(tempOut);
}

fix16_t fix16_lerp32(fix16_t inArg0, fix16_t inArg1, uint32_t inFract)
{
	int64_t tempOut;
	tempOut  = ((int64_t)inArg0 * (0 - inFract));
	tempOut	+= ((int64_t)inArg1 * inFract);
	tempOut >>= 32;
	return (fix16_t)tempOut;
}
#endif
Use Q16.16 fixed point for player horiz git-svn-id: https://svn.eduke32.com/eduke32@6724 1a8010ca-5511-0410-912e-c29ae57300e0 2018-03-07 04:21:05 +00:00			`#include "fix16.h"`
Rename int64.h to fix16_int64.h git-svn-id: https://svn.eduke32.com/eduke32@6848 1a8010ca-5511-0410-912e-c29ae57300e0 2018-04-23 06:35:25 +00:00			`#include "fix16_int64.h"`
Use Q16.16 fixed point for player horiz git-svn-id: https://svn.eduke32.com/eduke32@6724 1a8010ca-5511-0410-912e-c29ae57300e0 2018-03-07 04:21:05 +00:00
			`/* Subtraction and addition with overflow detection.`
			`* The versions without overflow detection are inlined in the header.`
			`*/`
			`#ifndef FIXMATH_NO_OVERFLOW`
			`fix16_t fix16_add(fix16_t a, fix16_t b)`
			`{`
			`// Use unsigned integers because overflow with signed integers is`
			`// an undefined operation (http://www.airs.com/blog/archives/120).`
			`uint32_t _a = a, _b = b;`
			`uint32_t sum = _a + _b;`

			`// Overflow can only happen if sign of a == sign of b, and then`
			`// it causes sign of sum != sign of a.`
			`if (!((_a ^ _b) & 0x80000000) && ((_a ^ sum) & 0x80000000))`
			`return fix16_overflow;`

			`return sum;`
			`}`

			`fix16_t fix16_sub(fix16_t a, fix16_t b)`
			`{`
			`uint32_t _a = a, _b = b;`
			`uint32_t diff = _a - _b;`

			`// Overflow can only happen if sign of a != sign of b, and then`
			`// it causes sign of diff != sign of a.`
			`if (((_a ^ _b) & 0x80000000) && ((_a ^ diff) & 0x80000000))`
			`return fix16_overflow;`

			`return diff;`
			`}`

			`/* Saturating arithmetic */`
			`fix16_t fix16_sadd(fix16_t a, fix16_t b)`
			`{`
			`fix16_t result = fix16_add(a, b);`

			`if (result == fix16_overflow)`
			`return (a >= 0) ? fix16_maximum : fix16_minimum;`

			`return result;`
			`}`

			`fix16_t fix16_ssub(fix16_t a, fix16_t b)`
			`{`
			`fix16_t result = fix16_sub(a, b);`

			`if (result == fix16_overflow)`
			`return (a >= 0) ? fix16_maximum : fix16_minimum;`

			`return result;`
			`}`
			`#endif`



			`/* 64-bit implementation for fix16_mul. Fastest version for e.g. ARM Cortex M3.`
			`* Performs a 32*32 -> 64bit multiplication. The middle 32 bits are the result,`
			`* bottom 16 bits are used for rounding, and upper 16 bits are used for overflow`
			`* detection.`
			`*/`

			`fix16_t fix16_mul(fix16_t inArg0, fix16_t inArg1)`
			`{`
			`int64_t product = (int64_t)inArg0 * inArg1;`

			`#ifndef FIXMATH_NO_OVERFLOW`
			`// The upper 17 bits should all be the same (the sign).`
			`uint32_t upper = (product >> 47);`
			`#endif`

			`if (product < 0)`
			`{`
			`#ifndef FIXMATH_NO_OVERFLOW`
			`if (~upper)`
			`return fix16_overflow;`
			`#endif`

			`#ifndef FIXMATH_NO_ROUNDING`
			`// This adjustment is required in order to round -1/2 correctly`
			`product--;`
			`#endif`
			`}`
			`else`
			`{`
			`#ifndef FIXMATH_NO_OVERFLOW`
			`if (upper)`
			`return fix16_overflow;`
			`#endif`
			`}`

			`#ifdef FIXMATH_NO_ROUNDING`
			`return product >> 16;`
			`#else`
			`fix16_t result = product >> 16;`
			`result += (product & 0x8000) >> 15;`

			`return result;`
			`#endif`
			`}`

			`#ifndef FIXMATH_NO_OVERFLOW`
			`/* Wrapper around fix16_mul to add saturating arithmetic. */`
			`fix16_t fix16_smul(fix16_t inArg0, fix16_t inArg1)`
			`{`
			`fix16_t result = fix16_mul(inArg0, inArg1);`

			`if (result == fix16_overflow)`
			`{`
			`if ((inArg0 >= 0) == (inArg1 >= 0))`
			`return fix16_maximum;`
			`else`
			`return fix16_minimum;`
			`}`

			`return result;`
			`}`
			`#endif`

			`/* 32-bit implementation of fix16_div. Fastest version for e.g. ARM Cortex M3.`
			`* Performs 32-bit divisions repeatedly to reduce the remainder. For this to`
			`* be efficient, the processor has to have 32-bit hardware division.`
			`*/`
			`#ifdef __GNUC__`
			`// Count leading zeros, using processor-specific instruction if available.`
			`#define clz(x) (__builtin_clzl(x) - (8 * sizeof(long) - 32))`
			`#else`
			`static uint8_t clz(uint32_t x)`
			`{`
			`uint8_t result = 0;`
			`if (x == 0) return 32;`
			`while (!(x & 0xF0000000)) { result += 4; x <<= 4; }`
			`while (!(x & 0x80000000)) { result += 1; x <<= 1; }`
			`return result;`
			`}`
			`#endif`

			`fix16_t fix16_div(fix16_t a, fix16_t b)`
			`{`
			`// This uses a hardware 32/32 bit division multiple times, until we have`
			`// computed all the bits in (a<<17)/b. Usually this takes 1-3 iterations.`

			`if (b == 0)`
			`return fix16_minimum;`

			`uint32_t remainder = (a >= 0) ? a : (-a);`
			`uint32_t divider = (b >= 0) ? b : (-b);`
			`uint32_t quotient = 0;`
			`int bit_pos = 17;`

			`// Kick-start the division a bit.`
			`// This improves speed in the worst-case scenarios where N and D are large`
			`// It gets a lower estimate for the result by N/(D >> 17 + 1).`
			`if (divider & 0xFFF00000)`
			`{`
			`uint32_t shifted_div = ((divider >> 17) + 1);`
			`quotient = remainder / shifted_div;`
			`remainder -= ((uint64_t)quotient * divider) >> 17;`
			`}`

			`// If the divider is divisible by 2^n, take advantage of it.`
			`while (!(divider & 0xF) && bit_pos >= 4)`
			`{`
			`divider >>= 4;`
			`bit_pos -= 4;`
			`}`

			`while (remainder && bit_pos >= 0)`
			`{`
			`// Shift remainder as much as we can without overflowing`
			`int shift = clz(remainder);`
			`if (shift > bit_pos) shift = bit_pos;`
			`remainder <<= shift;`
			`bit_pos -= shift;`

			`uint32_t div = remainder / divider;`
			`remainder = remainder % divider;`
			`quotient += div << bit_pos;`

			`#ifndef FIXMATH_NO_OVERFLOW`
			`if (div & ~(0xFFFFFFFF >> bit_pos))`
			`return fix16_overflow;`
			`#endif`

			`remainder <<= 1;`
			`bit_pos--;`
			`}`

			`#ifndef FIXMATH_NO_ROUNDING`
			`// Quotient is always positive so rounding is easy`
			`quotient++;`
			`#endif`

			`fix16_t result = quotient >> 1;`

			`// Figure out the sign of the result`
			`if ((a ^ b) & 0x80000000)`
			`{`
			`#ifndef FIXMATH_NO_OVERFLOW`
			`if (result == fix16_minimum)`
			`return fix16_overflow;`
			`#endif`

			`result = -result;`
			`}`

			`return result;`
			`}`

			`#ifndef FIXMATH_NO_OVERFLOW`
			`/* Wrapper around fix16_div to add saturating arithmetic. */`
			`fix16_t fix16_sdiv(fix16_t inArg0, fix16_t inArg1)`
			`{`
			`fix16_t result = fix16_div(inArg0, inArg1);`

			`if (result == fix16_overflow)`
			`{`
			`if ((inArg0 >= 0) == (inArg1 >= 0))`
			`return fix16_maximum;`
			`else`
			`return fix16_minimum;`
			`}`

			`return result;`
			`}`
			`#endif`

			`fix16_t fix16_mod(fix16_t x, fix16_t y) { return x %= y; }`


			`#ifndef FIXMATH_NO_64BIT`

			`fix16_t fix16_lerp8(fix16_t inArg0, fix16_t inArg1, uint8_t inFract)`
			`{`
			`int64_t tempOut = int64_mul_i32_i32(inArg0, ((1 << 8) - inFract));`
			`tempOut = int64_add(tempOut, int64_mul_i32_i32(inArg1, inFract));`
			`tempOut = int64_shift(tempOut, -8);`
			`return (fix16_t)int64_lo(tempOut);`
			`}`

			`fix16_t fix16_lerp16(fix16_t inArg0, fix16_t inArg1, uint16_t inFract)`
			`{`
			`int64_t tempOut = int64_mul_i32_i32(inArg0, (((int32_t)1 << 16) - inFract));`
			`tempOut = int64_add(tempOut, int64_mul_i32_i32(inArg1, inFract));`
			`tempOut = int64_shift(tempOut, -16);`
			`return (fix16_t)int64_lo(tempOut);`
			`}`

			`fix16_t fix16_lerp32(fix16_t inArg0, fix16_t inArg1, uint32_t inFract)`
			`{`
			`int64_t tempOut;`
			`tempOut = ((int64_t)inArg0 * (0 - inFract));`
			`tempOut += ((int64_t)inArg1 * inFract);`
			`tempOut >>= 32;`
			`return (fix16_t)tempOut;`
			`}`
			`#endif`