mirror of
https://github.com/ZDoom/Raze.git
synced 2025-03-13 20:42:11 +00:00
Re-port our changes to libdivide.h and add new changes so that it works as we need.
git-svn-id: https://svn.eduke32.com/eduke32@6177 1a8010ca-5511-0410-912e-c29ae57300e0
This commit is contained in:
parent
284fdf1b63
commit
f8bdc5814e
5 changed files with 56 additions and 35 deletions
|
@ -1153,9 +1153,14 @@ static inline void maybe_grow_buffer(char ** const buffer, int32_t * const buffe
|
|||
|
||||
////////// Inlined external libraries //////////
|
||||
|
||||
#ifndef LIBDIVIDE_BODY
|
||||
# define LIBDIVIDE_HEADER_ONLY
|
||||
#endif
|
||||
#define LIBDIVIDE_C_HEADERS
|
||||
#define LIBDIVIDE_NONAMESPACE
|
||||
#define LIBDIVIDE_NOINLINE
|
||||
#include "libdivide.h"
|
||||
|
||||
|
||||
/* End dependence on compat.o object. */
|
||||
|
||||
|
||||
|
|
|
@ -5,6 +5,11 @@
|
|||
// licenses. You may use libdivide under the terms of
|
||||
// either of these. See LICENSE.txt for more details.
|
||||
|
||||
// Modified for EDuke32.
|
||||
|
||||
#ifndef libdivide_h_
|
||||
#define libdivide_h_
|
||||
|
||||
#if defined(_WIN32) || defined(WIN32)
|
||||
#define LIBDIVIDE_WINDOWS 1
|
||||
#endif
|
||||
|
@ -16,7 +21,7 @@
|
|||
#pragma warning(disable: 4146)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
#if defined __cplusplus && !defined LIBDIVIDE_C_HEADERS
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <cassert>
|
||||
|
@ -26,6 +31,18 @@
|
|||
#include <assert.h>
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) || defined(_WIN64) || defined(_M_64)
|
||||
#define LIBDIVIDE_IS_X86_64 1
|
||||
#endif
|
||||
|
||||
#if defined(__i386__)
|
||||
#define LIBDIVIDE_IS_i386 1
|
||||
#endif
|
||||
|
||||
#if LIBDIVIDE_IS_X86_64 || defined __SSE2__ || (defined _M_IX86_FP && _M_IX86_FP == 2)
|
||||
#define LIBDIVIDE_USE_SSE2 1
|
||||
#endif
|
||||
|
||||
#if ! LIBDIVIDE_HAS_STDINT_TYPES && (! LIBDIVIDE_VC || _MSC_VER >= 1600)
|
||||
// Only Visual C++ 2010 and later include stdint.h
|
||||
#include <stdint.h>
|
||||
|
@ -57,14 +74,6 @@ typedef unsigned __int8 uint8_t;
|
|||
#define HAS_INT128_T 1
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) || defined(_WIN64) || defined(_M_X64)
|
||||
#define LIBDIVIDE_IS_X86_64 1
|
||||
#endif
|
||||
|
||||
#if defined(__i386__)
|
||||
#define LIBDIVIDE_IS_i386 1
|
||||
#endif
|
||||
|
||||
#if __GNUC__ || __clang__
|
||||
#define LIBDIVIDE_GCC_STYLE_ASM 1
|
||||
#endif
|
||||
|
@ -89,7 +98,7 @@ typedef unsigned __int8 uint8_t;
|
|||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
#if defined __cplusplus && !defined LIBDIVIDE_NONAMESPACE
|
||||
// We place libdivide within the libdivide namespace, and that goes in an
|
||||
// anonymous namespace so that the functions are only visible to files that
|
||||
// #include this header and don't get external linkage. At least that's the
|
||||
|
@ -178,7 +187,7 @@ struct libdivide_s64_branchfree_t {
|
|||
};
|
||||
|
||||
#ifndef LIBDIVIDE_API
|
||||
#ifdef __cplusplus
|
||||
#if defined __cplusplus || defined LIBDIVIDE_NOINLINE
|
||||
// In C++, we don't want our public functions to be static, because
|
||||
// they are arguments to templates and static functions can't do that.
|
||||
// They get internal linkage through virtue of the anonymous namespace.
|
||||
|
@ -276,6 +285,8 @@ LIBDIVIDE_API __m128i libdivide_s64_branchfree_do_vector(__m128i numers, const s
|
|||
|
||||
#endif
|
||||
|
||||
#ifndef LIBDIVIDE_HEADER_ONLY
|
||||
|
||||
//////// Internal Utility Functions
|
||||
|
||||
static inline uint32_t libdivide__mullhi_u32(uint32_t x, uint32_t y) {
|
||||
|
@ -337,7 +348,7 @@ static inline __m128i libdivide__u64_to_m128(uint64_t x) {
|
|||
#elif defined(__ICC)
|
||||
uint64_t __attribute__((aligned(16))) temp[2] = {x,x};
|
||||
return _mm_load_si128((const __m128i*)temp);
|
||||
#elif __clang__
|
||||
#elif __clang__ && (2 > __clang_major__ || (2 == __clang_major__ && 7 > __clang_minor__))
|
||||
// clang does not provide this intrinsic either
|
||||
return (__m128i){x, x};
|
||||
#else
|
||||
|
@ -546,7 +557,7 @@ static uint64_t libdivide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v,
|
|||
rhat; // A remainder.
|
||||
int s; // Shift amount for norm.
|
||||
|
||||
if (u1 >= v) { // If overflow, set rem.
|
||||
if (EDUKE32_PREDICT_FALSE(u1 >= v)) { // If overflow, set rem.
|
||||
if (r != NULL) // to an impossible value,
|
||||
*r = (uint64_t) -1; // and return the largest
|
||||
return (uint64_t) -1; // possible quotient.
|
||||
|
@ -554,7 +565,7 @@ static uint64_t libdivide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v,
|
|||
|
||||
// count leading zeros
|
||||
s = libdivide__count_leading_zeros64(v); // 0 <= s <= 63.
|
||||
if (s > 0) {
|
||||
if (EDUKE32_PREDICT_TRUE(s > 0)) {
|
||||
v = v << s; // Normalize divisor.
|
||||
un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31));
|
||||
un10 = u0 << s; // Shift dividend left.
|
||||
|
@ -703,8 +714,6 @@ static uint64_t libdivide_128_div_128_to_64(uint64_t u_hi, uint64_t u_lo, uint64
|
|||
#endif
|
||||
}
|
||||
|
||||
#ifndef LIBDIVIDE_HEADER_ONLY
|
||||
|
||||
////////// UINT32
|
||||
|
||||
static inline struct libdivide_u32_t libdivide_internal_u32_gen(uint32_t d, int branchfree) {
|
||||
|
@ -713,7 +722,7 @@ static inline struct libdivide_u32_t libdivide_internal_u32_gen(uint32_t d, int
|
|||
|
||||
struct libdivide_u32_t result;
|
||||
const uint32_t floor_log_2_d = 31 - libdivide__count_leading_zeros32(d);
|
||||
if ((d & (d - 1)) == 0) {
|
||||
if (EDUKE32_PREDICT_FALSE((d & (d - 1)) == 0)) {
|
||||
// Power of 2
|
||||
if (! branchfree) {
|
||||
result.magic = 0;
|
||||
|
@ -771,7 +780,7 @@ struct libdivide_u32_branchfree_t libdivide_u32_branchfree_gen(uint32_t d) {
|
|||
|
||||
uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) {
|
||||
uint8_t more = denom->more;
|
||||
if (more & LIBDIVIDE_U32_SHIFT_PATH) {
|
||||
if (EDUKE32_PREDICT_FALSE(more & LIBDIVIDE_U32_SHIFT_PATH)) {
|
||||
return numer >> (more & LIBDIVIDE_32_SHIFT_MASK);
|
||||
}
|
||||
else {
|
||||
|
@ -916,7 +925,7 @@ static inline struct libdivide_u64_t libdivide_internal_u64_gen(uint64_t d, int
|
|||
|
||||
struct libdivide_u64_t result;
|
||||
const uint32_t floor_log_2_d = 63 - libdivide__count_leading_zeros64(d);
|
||||
if ((d & (d - 1)) == 0) {
|
||||
if (EDUKE32_PREDICT_FALSE((d & (d - 1)) == 0)) {
|
||||
// Power of 2
|
||||
if (! branchfree) {
|
||||
result.magic = 0;
|
||||
|
@ -977,7 +986,7 @@ struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen(uint64_t d)
|
|||
|
||||
uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) {
|
||||
uint8_t more = denom->more;
|
||||
if (more & LIBDIVIDE_U64_SHIFT_PATH) {
|
||||
if (EDUKE32_PREDICT_FALSE(more & LIBDIVIDE_U64_SHIFT_PATH)) {
|
||||
return numer >> (more & LIBDIVIDE_64_SHIFT_MASK);
|
||||
}
|
||||
else {
|
||||
|
@ -1145,7 +1154,7 @@ static inline struct libdivide_s32_t libdivide_internal_s32_gen(int32_t d, int b
|
|||
const uint32_t floor_log_2_d = 31 - libdivide__count_leading_zeros32(absD);
|
||||
// check if exactly one bit is set,
|
||||
// don't care if absD is 0 since that's divide by zero
|
||||
if ((absD & (absD - 1)) == 0) {
|
||||
if (EDUKE32_PREDICT_FALSE((absD & (absD - 1)) == 0)) {
|
||||
// Branchfree and normal paths are exactly the same
|
||||
result.magic = 0;
|
||||
result.more = floor_log_2_d | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0) | LIBDIVIDE_S32_SHIFT_PATH;
|
||||
|
@ -1436,7 +1445,7 @@ static inline struct libdivide_s64_t libdivide_internal_s64_gen(int64_t d, int b
|
|||
const uint32_t floor_log_2_d = 63 - libdivide__count_leading_zeros64(absD);
|
||||
// check if exactly one bit is set,
|
||||
// don't care if absD is 0 since that's divide by zero
|
||||
if ((absD & (absD - 1)) == 0) {
|
||||
if (EDUKE32_PREDICT_FALSE((absD & (absD - 1)) == 0)) {
|
||||
// Branchfree and non-branchfree cases are the same
|
||||
result.magic = 0;
|
||||
result.more = floor_log_2_d | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0);
|
||||
|
@ -1704,6 +1713,8 @@ __m128i libdivide_s64_branchfree_do_vector(__m128i numers, const struct libdivid
|
|||
|
||||
#endif
|
||||
|
||||
#endif // LIBDIVIDE_HEADER_ONLY
|
||||
|
||||
/////////// C++ stuff
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -1734,11 +1745,19 @@ namespace libdivide_internal {
|
|||
|
||||
// Some bogus unswitch functions for unsigned types so the same
|
||||
// (presumably templated) code can work for both signed and unsigned.
|
||||
uint32_t crash_u32(uint32_t, const libdivide_u32_t *);
|
||||
uint64_t crash_u64(uint64_t, const libdivide_u64_t *);
|
||||
#if LIBDIVIDE_USE_SSE2
|
||||
__m128i crash_u32_vector(__m128i, const libdivide_u32_t *);
|
||||
__m128i crash_u64_vector(__m128i, const libdivide_u64_t *);
|
||||
#endif
|
||||
#ifndef LIBDIVIDE_HEADER_ONLY
|
||||
uint32_t crash_u32(uint32_t, const libdivide_u32_t *) { abort(); return *(uint32_t *)NULL; }
|
||||
uint64_t crash_u64(uint64_t, const libdivide_u64_t *) { abort(); return *(uint64_t *)NULL; }
|
||||
#if LIBDIVIDE_USE_SSE2
|
||||
__m128i crash_u32_vector(__m128i, const libdivide_u32_t *) { abort(); return *(__m128i *)NULL; }
|
||||
__m128i crash_u64_vector(__m128i, const libdivide_u64_t *) { abort(); return *(__m128i *)NULL; }
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Base divider, which provides storage for the actual divider
|
||||
|
@ -1927,11 +1946,11 @@ __m128i operator/(__m128i numer, const divider<int_type, ALGO> & denom) {
|
|||
}
|
||||
#endif
|
||||
|
||||
#endif // __cplusplus
|
||||
#endif
|
||||
|
||||
#endif // LIBDIVIDE_HEADER_ONLY
|
||||
|
||||
#ifdef __cplusplus
|
||||
#if defined __cplusplus && !defined LIBDIVIDE_NONAMESPACE
|
||||
} // close namespace libdivide
|
||||
} // close anonymous namespace
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -39,8 +39,8 @@ extern int32_t reciptable[2048], fpuasm;
|
|||
#define LIBDIVIDE_ALWAYS
|
||||
#define DIVTABLESIZE 16384
|
||||
|
||||
extern libdivide_s64pad_t divtable64[DIVTABLESIZE];
|
||||
extern libdivide_s32pad_t divtable32[DIVTABLESIZE];
|
||||
extern libdivide_s64_t divtable64[DIVTABLESIZE];
|
||||
extern libdivide_s32_t divtable32[DIVTABLESIZE];
|
||||
extern void initdivtables(void);
|
||||
|
||||
#if defined(__arm__) || defined(LIBDIVIDE_ALWAYS)
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
* Playing-field leveller for Build
|
||||
*/
|
||||
|
||||
#define LIBDIVIDE_BODY
|
||||
#include "compat.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
|
@ -646,7 +647,3 @@ int access(const char *pathname, int mode)
|
|||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define LIBDIVIDE_BODY
|
||||
#include "libdivide.h"
|
||||
|
||||
|
|
|
@ -10,8 +10,8 @@
|
|||
#include "compat.h"
|
||||
#include "pragmas.h"
|
||||
|
||||
libdivide_s64pad_t divtable64[DIVTABLESIZE];
|
||||
libdivide_s32pad_t divtable32[DIVTABLESIZE];
|
||||
libdivide_s64_t divtable64[DIVTABLESIZE];
|
||||
libdivide_s32_t divtable32[DIVTABLESIZE];
|
||||
|
||||
void initdivtables(void)
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue