mirror of
https://github.com/ZDoom/raze-gles.git
synced 2024-12-25 11:10:47 +00:00
Merge our changes to libdivide back in
git-svn-id: https://svn.eduke32.com/eduke32@6845 1a8010ca-5511-0410-912e-c29ae57300e0
This commit is contained in:
parent
bd2019435a
commit
ef9845a947
1 changed files with 42 additions and 19 deletions
|
@ -5,6 +5,8 @@
|
||||||
// You may use libdivide under the terms of either of these.
|
// You may use libdivide under the terms of either of these.
|
||||||
// See LICENSE.txt for more details.
|
// See LICENSE.txt for more details.
|
||||||
|
|
||||||
|
// Modified for EDuke32.
|
||||||
|
|
||||||
#ifndef LIBDIVIDE_H
|
#ifndef LIBDIVIDE_H
|
||||||
#define LIBDIVIDE_H
|
#define LIBDIVIDE_H
|
||||||
|
|
||||||
|
@ -15,7 +17,7 @@
|
||||||
#define LIBDIVIDE_VC
|
#define LIBDIVIDE_VC
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#if defined __cplusplus && !defined LIBDIVIDE_C_HEADERS
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#else
|
#else
|
||||||
|
@ -23,6 +25,18 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__x86_64__) || defined(_WIN64) || defined(_M_X64)
|
||||||
|
#define LIBDIVIDE_IS_X86_64 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__i386__)
|
||||||
|
#define LIBDIVIDE_IS_i386 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if LIBDIVIDE_IS_X86_64 || defined __SSE2__ || (defined _M_IX86_FP && _M_IX86_FP == 2)
|
||||||
|
#define LIBDIVIDE_USE_SSE2 1
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#if defined(LIBDIVIDE_USE_SSE2)
|
#if defined(LIBDIVIDE_USE_SSE2)
|
||||||
|
@ -41,14 +55,6 @@
|
||||||
#define HAS_INT128_T
|
#define HAS_INT128_T
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__x86_64__) || defined(_WIN64) || defined(_M_X64)
|
|
||||||
#define LIBDIVIDE_IS_X86_64
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__i386__)
|
|
||||||
#define LIBDIVIDE_IS_i386
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__GNUC__) || defined(__clang__)
|
#if defined(__GNUC__) || defined(__clang__)
|
||||||
#define LIBDIVIDE_GCC_STYLE_ASM
|
#define LIBDIVIDE_GCC_STYLE_ASM
|
||||||
#endif
|
#endif
|
||||||
|
@ -87,7 +93,7 @@
|
||||||
#include <smmintrin.h>
|
#include <smmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#if defined __cplusplus && !defined LIBDIVIDE_NONAMESPACE
|
||||||
// We place libdivide within the libdivide namespace, and that goes in an
|
// We place libdivide within the libdivide namespace, and that goes in an
|
||||||
// anonymous namespace so that the functions are only visible to files that
|
// anonymous namespace so that the functions are only visible to files that
|
||||||
// #include this header and don't get external linkage. At least that's the
|
// #include this header and don't get external linkage. At least that's the
|
||||||
|
@ -184,7 +190,7 @@ struct libdivide_s64_branchfree_t {
|
||||||
#pragma pack(pop)
|
#pragma pack(pop)
|
||||||
|
|
||||||
#ifndef LIBDIVIDE_API
|
#ifndef LIBDIVIDE_API
|
||||||
#ifdef __cplusplus
|
#if defined __cplusplus || defined LIBDIVIDE_NOINLINE
|
||||||
// In C++, we don't want our public functions to be static, because
|
// In C++, we don't want our public functions to be static, because
|
||||||
// they are arguments to templates and static functions can't do that.
|
// they are arguments to templates and static functions can't do that.
|
||||||
// They get internal linkage through virtue of the anonymous namespace.
|
// They get internal linkage through virtue of the anonymous namespace.
|
||||||
|
@ -283,6 +289,8 @@ LIBDIVIDE_API __m128i libdivide_s64_branchfree_do_vector(__m128i numers, const s
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef LIBDIVIDE_HEADER_ONLY
|
||||||
|
|
||||||
//////// Internal Utility Functions
|
//////// Internal Utility Functions
|
||||||
|
|
||||||
static inline uint32_t libdivide__mullhi_u32(uint32_t x, uint32_t y) {
|
static inline uint32_t libdivide__mullhi_u32(uint32_t x, uint32_t y) {
|
||||||
|
@ -571,7 +579,7 @@ static uint64_t libdivide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v,
|
||||||
|
|
||||||
// If overflow, set rem. to an impossible value,
|
// If overflow, set rem. to an impossible value,
|
||||||
// and return the largest possible quotient
|
// and return the largest possible quotient
|
||||||
if (u1 >= v) {
|
if (EDUKE32_PREDICT_FALSE(u1 >= v)) {
|
||||||
if (r != NULL)
|
if (r != NULL)
|
||||||
*r = (uint64_t) -1;
|
*r = (uint64_t) -1;
|
||||||
return (uint64_t) -1;
|
return (uint64_t) -1;
|
||||||
|
@ -579,7 +587,7 @@ static uint64_t libdivide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v,
|
||||||
|
|
||||||
// count leading zeros
|
// count leading zeros
|
||||||
s = libdivide__count_leading_zeros64(v);
|
s = libdivide__count_leading_zeros64(v);
|
||||||
if (s > 0) {
|
if (EDUKE32_PREDICT_TRUE(s > 0)) {
|
||||||
// Normalize divisor
|
// Normalize divisor
|
||||||
v = v << s;
|
v = v << s;
|
||||||
un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31));
|
un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31));
|
||||||
|
@ -752,7 +760,7 @@ static inline struct libdivide_u32_t libdivide_internal_u32_gen(uint32_t d, int
|
||||||
|
|
||||||
struct libdivide_u32_t result;
|
struct libdivide_u32_t result;
|
||||||
uint32_t floor_log_2_d = 31 - libdivide__count_leading_zeros32(d);
|
uint32_t floor_log_2_d = 31 - libdivide__count_leading_zeros32(d);
|
||||||
if ((d & (d - 1)) == 0) {
|
if (EDUKE32_PREDICT_FALSE((d & (d - 1)) == 0)) {
|
||||||
// Power of 2
|
// Power of 2
|
||||||
if (! branchfree) {
|
if (! branchfree) {
|
||||||
result.magic = 0;
|
result.magic = 0;
|
||||||
|
@ -813,7 +821,7 @@ struct libdivide_u32_branchfree_t libdivide_u32_branchfree_gen(uint32_t d) {
|
||||||
|
|
||||||
uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) {
|
uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) {
|
||||||
uint8_t more = denom->more;
|
uint8_t more = denom->more;
|
||||||
if (more & LIBDIVIDE_U32_SHIFT_PATH) {
|
if (EDUKE32_PREDICT_FALSE(more & LIBDIVIDE_U32_SHIFT_PATH)) {
|
||||||
return numer >> (more & LIBDIVIDE_32_SHIFT_MASK);
|
return numer >> (more & LIBDIVIDE_32_SHIFT_MASK);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -960,7 +968,7 @@ static inline struct libdivide_u64_t libdivide_internal_u64_gen(uint64_t d, int
|
||||||
|
|
||||||
struct libdivide_u64_t result;
|
struct libdivide_u64_t result;
|
||||||
uint32_t floor_log_2_d = 63 - libdivide__count_leading_zeros64(d);
|
uint32_t floor_log_2_d = 63 - libdivide__count_leading_zeros64(d);
|
||||||
if ((d & (d - 1)) == 0) {
|
if (EDUKE32_PREDICT_FALSE((d & (d - 1)) == 0)) {
|
||||||
// Power of 2
|
// Power of 2
|
||||||
if (! branchfree) {
|
if (! branchfree) {
|
||||||
result.magic = 0;
|
result.magic = 0;
|
||||||
|
@ -1023,7 +1031,7 @@ struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen(uint64_t d) {
|
||||||
|
|
||||||
uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) {
|
uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) {
|
||||||
uint8_t more = denom->more;
|
uint8_t more = denom->more;
|
||||||
if (more & LIBDIVIDE_U64_SHIFT_PATH) {
|
if (EDUKE32_PREDICT_FALSE(more & LIBDIVIDE_U64_SHIFT_PATH)) {
|
||||||
return numer >> (more & LIBDIVIDE_64_SHIFT_MASK);
|
return numer >> (more & LIBDIVIDE_64_SHIFT_MASK);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -1195,7 +1203,7 @@ static inline struct libdivide_s32_t libdivide_internal_s32_gen(int32_t d, int b
|
||||||
uint32_t floor_log_2_d = 31 - libdivide__count_leading_zeros32(absD);
|
uint32_t floor_log_2_d = 31 - libdivide__count_leading_zeros32(absD);
|
||||||
// check if exactly one bit is set,
|
// check if exactly one bit is set,
|
||||||
// don't care if absD is 0 since that's divide by zero
|
// don't care if absD is 0 since that's divide by zero
|
||||||
if ((absD & (absD - 1)) == 0) {
|
if (EDUKE32_PREDICT_FALSE((absD & (absD - 1)) == 0)) {
|
||||||
// Branchfree and normal paths are exactly the same
|
// Branchfree and normal paths are exactly the same
|
||||||
result.magic = 0;
|
result.magic = 0;
|
||||||
result.more = floor_log_2_d | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0) | LIBDIVIDE_S32_SHIFT_PATH;
|
result.more = floor_log_2_d | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0) | LIBDIVIDE_S32_SHIFT_PATH;
|
||||||
|
@ -1499,7 +1507,7 @@ static inline struct libdivide_s64_t libdivide_internal_s64_gen(int64_t d, int b
|
||||||
uint32_t floor_log_2_d = 63 - libdivide__count_leading_zeros64(absD);
|
uint32_t floor_log_2_d = 63 - libdivide__count_leading_zeros64(absD);
|
||||||
// check if exactly one bit is set,
|
// check if exactly one bit is set,
|
||||||
// don't care if absD is 0 since that's divide by zero
|
// don't care if absD is 0 since that's divide by zero
|
||||||
if ((absD & (absD - 1)) == 0) {
|
if (EDUKE32_PREDICT_FALSE((absD & (absD - 1)) == 0)) {
|
||||||
// Branchfree and non-branchfree cases are the same
|
// Branchfree and non-branchfree cases are the same
|
||||||
result.magic = 0;
|
result.magic = 0;
|
||||||
result.more = floor_log_2_d | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0);
|
result.more = floor_log_2_d | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0);
|
||||||
|
@ -1778,6 +1786,8 @@ __m128i libdivide_s64_branchfree_do_vector(__m128i numers, const struct libdivid
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif // LIBDIVIDE_HEADER_ONLY
|
||||||
|
|
||||||
/////////// C++ stuff
|
/////////// C++ stuff
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
@ -1877,12 +1887,23 @@ namespace libdivide_internal {
|
||||||
// than signed i.e. alg3 and alg4 are not defined for unsigned. In
|
// than signed i.e. alg3 and alg4 are not defined for unsigned. In
|
||||||
// order to make templates compile we need to define unsigned alg3 and
|
// order to make templates compile we need to define unsigned alg3 and
|
||||||
// alg4 as crash functions.
|
// alg4 as crash functions.
|
||||||
|
|
||||||
|
uint32_t libdivide_u32_crash(uint32_t, const libdivide_u32_t *);
|
||||||
|
uint64_t libdivide_u64_crash(uint64_t, const libdivide_u64_t *);
|
||||||
|
|
||||||
|
#if defined(LIBDIVIDE_USE_SSE2)
|
||||||
|
__m128i libdivide_u32_crash_vector(__m128i, const libdivide_u32_t *);
|
||||||
|
__m128i libdivide_u64_crash_vector(__m128i, const libdivide_u64_t *);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef LIBDIVIDE_HEADER_ONLY
|
||||||
uint32_t libdivide_u32_crash(uint32_t, const libdivide_u32_t *) { exit(-1); }
|
uint32_t libdivide_u32_crash(uint32_t, const libdivide_u32_t *) { exit(-1); }
|
||||||
uint64_t libdivide_u64_crash(uint64_t, const libdivide_u64_t *) { exit(-1); }
|
uint64_t libdivide_u64_crash(uint64_t, const libdivide_u64_t *) { exit(-1); }
|
||||||
|
|
||||||
#if defined(LIBDIVIDE_USE_SSE2)
|
#if defined(LIBDIVIDE_USE_SSE2)
|
||||||
__m128i libdivide_u32_crash_vector(__m128i, const libdivide_u32_t *) { exit(-1); }
|
__m128i libdivide_u32_crash_vector(__m128i, const libdivide_u32_t *) { exit(-1); }
|
||||||
__m128i libdivide_u64_crash_vector(__m128i, const libdivide_u64_t *) { exit(-1); }
|
__m128i libdivide_u64_crash_vector(__m128i, const libdivide_u64_t *) { exit(-1); }
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<typename T, int ALGO> struct dispatcher { };
|
template<typename T, int ALGO> struct dispatcher { };
|
||||||
|
@ -2048,8 +2069,10 @@ __m128i operator/=(__m128i& numer, const divider<int_type, ALGO>& denom) {
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined __cplusplus && !defined LIBDIVIDE_NONAMESPACE
|
||||||
} // namespace libdivide
|
} // namespace libdivide
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif // __cplusplus
|
#endif // __cplusplus
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue