raze-gles/source/build/src/pragmas.cpp

// Function-wrapped Watcom pragmas
// by Jonathon Fowler (jf@jonof.id.au)
//
// These functions represent some of the more longer-winded pragmas
// from the original pragmas.h wrapped into functions for easier
// use since many jumps and whatnot make it harder to write macro-
// inline versions. I'll eventually convert these to macro-inline
// equivalents.		--Jonathon

#include "compat.h"
#include "pragmas.h"

libdivide::libdivide_s64_t divtable64[DIVTABLESIZE];
libdivide::libdivide_s32_t divtable32[DIVTABLESIZE];

void initdivtables(void)
{
    for (int i = 1; i < DIVTABLESIZE; ++i)
    {
        divtable64[i] = libdivide::libdivide_s64_gen(i);
        divtable32[i] = libdivide::libdivide_s32_gen(i);
    }
}

uint32_t divideu32_noinline(uint32_t n, uint32_t d) { return divideu32(n, d); }
int32_t tabledivide32_noinline(int32_t n, int32_t d) { return tabledivide32(n, d); }
int64_t tabledivide64_noinline(int64_t n, int64_t d) { return tabledivide64(n, d); }


//
// Generic C version
//

#ifndef pragmas_have_qinterpolatedown16
void qinterpolatedown16(intptr_t bufptr, int32_t num, int32_t val, int32_t add)
{
    auto lptr = (int32_t *)bufptr;
    for (size_t i = 0, i_end = num; i < i_end; ++i)
    {
        lptr[i] = val>>16;
        val += add;
    }
}

void qinterpolatedown16short(intptr_t bufptr, int32_t num, int32_t val, int32_t add)
{
    auto sptr = (int16_t *)bufptr;
    for (size_t i = 0, i_end = num; i < i_end; ++i)
    {
        sptr[i] = val>>16;
        val += add;
    }
}
#endif

#ifndef pragmas_have_clearbuf
void clearbuf(void *d, int32_t c, int32_t a)
{
    auto p = (int32_t *)d;

#if 0
    if (a == 0)
    {
        clearbufbyte(d, c<<2, 0);
        return;
    }
#endif

    while (c--)
        *p++ = a;
}
#endif

#ifndef pragmas_have_copybuf
void copybuf(const void *s, void *d, int32_t c)
{
    auto p = (const int32_t *) s;
    auto q = (int32_t *) d;

    while (c--)
        *q++ = *p++;
}
#endif

#ifndef pragmas_have_swaps
void swapbuf4(void *a, void *b, int32_t c)
{
    auto p = (int32_t *) a;
    auto q = (int32_t *) b;

    while ((c--) > 0)
    {
        int x = *q, y = *p;
        *(q++) = y;
        *(p++) = x;
    }
}
#endif

#ifndef pragmas_have_clearbufbyte
void clearbufbyte(void *D, int32_t c, int32_t a)
{
    // Cringe City
    constexpr int32_t m[4] = { 0xffl, 0xff00l, 0xff0000l, (int32_t)0xff000000l };
    int   z = 0;
    auto p = (char *)D;

    while ((c--) > 0)
    {
        *(p++) = (uint8_t)((a & m[z])>>(z<<3));
        z=(z+1)&3;
    }
}
#endif

#ifndef pragmas_have_copybufbyte
void copybufbyte(const void *s, void *d, int32_t c)
{
    auto src = (const char *)s;
    auto dst = (char *)d;

    while (c--)
        *dst++ = *src++;
}
#endif


// copybufreverse() is a special case: use the assembly version for GCC on x86
// *and* x86_64, and the C version otherwise.
// XXX: we don't honor NOASM in the x86_64 case.

#if defined(__GNUC__) && defined(__x86_64__)
// NOTE: Almost CODEDUP from x86 GCC assembly version, except that
// - %%esi -> %%rsi
// - %%edi -> %%rdi
// - (dec,inc,sub,add)l suffix removed where necessary
void copybufreverse(const void *S, void *D, int32_t c)
{
    __asm__ __volatile__(
        "shrl $1, %%ecx\n\t"
        "jnc 0f\n\t"		// jnc skipit1
        "movb (%%rsi), %%al\n\t"
        "dec %%rsi\n\t"
        "movb %%al, (%%rdi)\n\t"
        "inc %%rdi\n\t"
        "0:\n\t"		// skipit1:
        "shrl $1, %%ecx\n\t"
        "jnc 1f\n\t"		// jnc skipit2
        "movw -1(%%rsi), %%ax\n\t"
        "sub $2, %%rsi\n\t"
        "rorw $8, %%ax\n\t"
        "movw %%ax, (%%rdi)\n\t"
        "add $2, %%rdi\n\t"
        "1:\n\t"		// skipit2
        "testl %%ecx, %%ecx\n\t"
        "jz 3f\n\t"		// jz endloop
        "2:\n\t"		// begloop
        "movl -3(%%rsi), %%eax\n\t"
        "sub $4, %%rsi\n\t"
        "bswapl %%eax\n\t"
        "movl %%eax, (%%rdi)\n\t"
        "add $4, %%rdi\n\t"
        "decl %%ecx\n\t"
        "jnz 2b\n\t"		// jnz begloop
        "3:"
    : "+S"(S), "+D"(D), "+c"(c) :
            : "eax", "memory", "cc"
        );
}
#elif !defined pragmas_have_copybufreverse
void copybufreverse(const void *s, void *d, int32_t c)
{
    auto src = (const char *)s;
    auto dst = (char *)d;

    while (c--)
        *dst++ = *src--;
}
#endif