// Function-wrapped Watcom pragmas // by Jonathon Fowler (jf@jonof.id.au) // // These functions represent some of the more longer-winded pragmas // from the original pragmas.h wrapped into functions for easier // use since many jumps and whatnot make it harder to write macro- // inline versions. I'll eventually convert these to macro-inline // equivalents. --Jonathon //#include "pragmas.h" #include "compat.h" int32_t dmval; #if defined(__GNUC__) && defined(GEKKO) // naked function (no prolog/epilog) // FIXME: this function produces unused parameter warnings and a missing return warning int32_t scale(int32_t a, int32_t d, int32_t c) { // return ((int64_t)a * d) / c; __asm__ __volatile__ ( " mullw 6, 3, 4\n" " mulhw 4, 3, 4\n" " mr 3, 6\n" " srawi. 0, 5, 31\n" " cmpwi cr1, 4, 0\n" " crxor 7, 0, 4\n" " xor 5, 0, 5\n" " subf. 5, 0, 5\n" " beq DivByZero\n" " bge cr1, Div64Common\n" " subfic 3, 3, 0\n" " subfze 4, 4\n" "Div64Common:\n" " cmplw 4, 5\n" " cntlzw 6, 5\n" " xor 4, 4, 3\n" " slw 5, 5, 6\n" " rotlw 4, 4, 6\n" " slw 3, 3, 6\n" " li 7, 2\n" " xor 4, 4, 3\n" " bge DivOverflow\n" " mtctr 7\n" "Div64Compute:\n" " srwi 6, 5, 16\n" " divwu 7, 4, 6\n" " mullw 6, 7, 6\n" " subf 4, 6, 4\n" " slwi 4, 4, 16\n" " inslwi 4, 3, 16, 16\n" " slwi 3, 3, 16\n" " clrlwi 6, 5, 16\n" " mullw 6, 7, 6\n" " subfc 4, 6, 4\n" " subfe. 6, 6, 6\n" " add 3, 3, 7\n" " bge Div64Done\n" "Div64Correct:\n" " addc 4, 4, 5\n" " addze. 6, 6\n" " subi 3, 3, 1\n" " blt Div64Correct\n" "Div64Done:\n" " bdnz Div64Compute\n" " cmpwi 3, 0\n" " bso cr1, Div64QuotientNeg\n" " blt DivOverflow\n" " blr\n" "Div64QuotientNeg:\n" " neg. 3, 3\n" " blelr\n" "DivOverflow:\n" " cror 4, 7, 7\n" "DivByZero:\n" " lis 3, 0x8000\n" " bltlr cr1\n" " subi 3, 3, 1\n" " blr\n" ); } void clearbufbyte(void *d, int32_t c, int32_t a) { if (a==0) { uint8_t *dd = (uint8_t*)d; int32_t align = (32 - (int32_t)d) & 31; if (align && c >= align) { uint32_t izero = 0; double fzero = 0; c -= align; if (align&1) { *dd = izero; dd += 1; } if (align&2) { *(uint16_t*)dd = izero; dd += 2; } if (align&4) { *(uint32_t*)dd = izero; dd += 4; } if (align&8) { *(double*)dd = fzero; dd += 8; } if (align&16) { *(double*)dd = fzero; *(double*)(dd+8) = fzero; dd += 16; } } align = c >> 5; while (align) { __asm__ ( " dcbz 0, %0\n" " addi %0, %0, 32\n" : "+r"(dd) : : "memory" ); align--; } if ((c &= 31)) { while (c--) { *dd++ = 0; } } return; } __asm__ __volatile__ ( " add %1, %1, %2\n" " neg. %2, %2\n" " beq 2f\n" "1:\n" " stbx %0, %1, %2\n" " addic. %2, %2, 1\n" " rotrwi %0, %0, 8\n" " bne 1b\n" "2:\n" : "+r"(a), "+b"(d), "+r"(c) : : "cc", "xer", "memory" ); } #elif defined(__GNUC__) && defined(__i386__) && !defined(NOASM) // NOASM // // GCC Inline Assembler version // #define ASM __asm__ __volatile__ int32_t boundmulscale(int32_t a, int32_t b, int32_t c) { ASM( "imull %%ebx\n\t" "movl %%edx, %%ebx\n\t" // mov ebx, edx "shrdl %%cl, %%edx, %%eax\n\t" // mov eax, edx, cl "sarl %%cl, %%edx\n\t" // sar edx, cl "xorl %%eax, %%edx\n\t" // xor edx, eax "js 0f\n\t" // js checkit "xorl %%eax, %%edx\n\t" // xor edx, eax "jz 1f\n\t" // js skipboundit "cmpl $0xffffffff, %%edx\n\t" // cmp edx, 0xffffffff "je 1f\n\t" // je skipboundit "0:\n\t" // checkit: "movl %%ebx, %%eax\n\t" // mov eax, ebx "sarl $31, %%eax\n\t" // sar eax, 31 "xorl $0x7fffffff, %%eax\n\t" // xor eax, 0x7fffffff "1:" // skipboundit: : "+a"(a), "+b"(b), "+c"(c) // input eax ebx ecx : : "edx", "cc" ); return a; } void clearbufbyte(void *D, int32_t c, int32_t a) { ASM( "cmpl $4, %%ecx\n\t" "jae 1f\n\t" "testb $1, %%cl\n\t" "jz 0f\n\t" // jz preskip "stosb\n\t" "0:\n\t" // preskip: "shrl $1, %%ecx\n\t" "rep\n\t" "stosw\n\t" "jmp 5f\n\t" // jmp endit "1:\n\t" // intcopy: "testl $1, %%edi\n\t" "jz 2f\n\t" // jz skip1 "stosb\n\t" "decl %%ecx\n\t" "2:\n\t" // skip1: "testl $2, %%edi\n\t" "jz 3f\n\t" // jz skip2 "stosw\n\t" "subl $2, %%ecx\n\t" "3:\n\t" // skip2: "movl %%ecx, %%ebx\n\t" "shrl $2, %%ecx\n\t" "rep\n\t" "stosl\n\t" "testb $2, %%bl\n\t" "jz 4f\n\t" // jz skip3 "stosw\n\t" "4:\n\t" // skip3: "testb $1, %%bl\n\t" "jz 5f\n\t" // jz endit "stosb\n\t" "5:" // endit : "+D"(D), "+c"(c), "+a"(a) : : "ebx", "memory", "cc" ); } void copybufbyte(const void *S, void *D, int32_t c) { ASM( "cmpl $4, %%ecx\n\t" // cmp ecx, 4 "jae 1f\n\t" "testb $1, %%cl\n\t" // test cl, 1 "jz 0f\n\t" "movsb\n\t" "0:\n\t" // preskip: "shrl $1, %%ecx\n\t" // shr ecx, 1 "rep\n\t" "movsw\n\t" "jmp 5f\n\t" "1:\n\t" // intcopy: "testl $1, %%edi\n\t" // test edi, 1 "jz 2f\n\t" "movsb\n\t" "decl %%ecx\n\t" "2:\n\t" // skip1: "testl $2, %%edi\n\t" // test edi, 2 "jz 3f\n\t" "movsw\n\t" "subl $2, %%ecx\n\t" // sub ecx, 2 "3:\n\t" // skip2: "movl %%ecx, %%ebx\n\t" // mov ebx, ecx "shrl $2, %%ecx\n\t" // shr ecx ,2 "rep\n\t" "movsl\n\t" "testb $2, %%bl\n\t" // test bl, 2 "jz 4f\n\t" "movsw\n\t" "4:\n\t" // skip3: "testb $1, %%bl\n\t" // test bl, 1 "jz 5f\n\t" "movsb\n\t" "5:" // endit: : "+c"(c), "+S"(S), "+D"(D) : : "ebx", "memory", "cc" ); } void copybufreverse(const void *S, void *D, int32_t c) { ASM( "shrl $1, %%ecx\n\t" "jnc 0f\n\t" // jnc skipit1 "movb (%%esi), %%al\n\t" "decl %%esi\n\t" "movb %%al, (%%edi)\n\t" "incl %%edi\n\t" "0:\n\t" // skipit1: "shrl $1, %%ecx\n\t" "jnc 1f\n\t" // jnc skipit2 "movw -1(%%esi), %%ax\n\t" "subl $2, %%esi\n\t" "rorw $8, %%ax\n\t" "movw %%ax, (%%edi)\n\t" "addl $2, %%edi\n\t" "1:\n\t" // skipit2 "testl %%ecx, %%ecx\n\t" "jz 3f\n\t" // jz endloop "2:\n\t" // begloop "movl -3(%%esi), %%eax\n\t" "subl $4, %%esi\n\t" "bswapl %%eax\n\t" "movl %%eax, (%%edi)\n\t" "addl $4, %%edi\n\t" "decl %%ecx\n\t" "jnz 2b\n\t" // jnz begloop "3:" : "+S"(S), "+D"(D), "+c"(c) : : "eax", "memory", "cc" ); } #elif defined(_MSC_VER) && !defined(NOASM) // __GNUC__ && __i386__ // // Microsoft C Inline Assembler version // #else // _MSC_VER // // Generic C version // void qinterpolatedown16(intptr_t bufptr, int32_t num, int32_t val, int32_t add) { // gee, I wonder who could have provided this... int32_t i, *lptr = (int32_t *)bufptr; for (i=0; i>16); val += add; } } void qinterpolatedown16short(intptr_t bufptr, int32_t num, int32_t val, int32_t add) { // ...maybe the same person who provided this too? int32_t i; int16_t *sptr = (int16_t *)bufptr; for (i=0; i>16); val += add; } } void clearbuf(void *d, int32_t c, int32_t a) { int32_t *p = (int32_t *)d; while ((c--) > 0) *(p++) = a; } void copybuf(const void *s, void *d, int32_t c) { const int32_t *p = (const int32_t *)s; int32_t *q = (int32_t *)d; while ((c--) > 0) *(q++) = *(p++); } void swapbuf4(void *a, void *b, int32_t c) { int32_t *p = (int32_t *)a, *q = (int32_t *)b; int32_t x, y; while ((c--) > 0) { x = *q; y = *p; *(q++) = y; *(p++) = x; } } void clearbufbyte(void *D, int32_t c, int32_t a) { // Cringe City char *p = (char *)D; int32_t m[4] = { 0xffl,0xff00l,0xff0000l,(int32_t)0xff000000l }; int32_t n[4] = { 0,8,16,24 }; int32_t z=0; while ((c--) > 0) { *(p++) = (uint8_t)((a & m[z])>>n[z]); z=(z+1)&3; } } void copybufbyte(const void *S, void *D, int32_t c) { const char *p = (const char *)S; char *q = (char *)D; while ((c--) > 0) *(q++) = *(p++); } // copybufreverse() is a special case: use the assembly version for GCC on x86 // *and* x86_64, and the C version otherwise. // XXX: we don't honor NOASM in the x86_64 case. #if defined(__GNUC__) && defined(__x86_64__) // NOTE: Almost CODEDUP from x86 GCC assembly version, except that // - %%esi -> %%rsi // - %%edi -> %%rdi // - (dec,inc,sub,add)l suffix removed where necessary void copybufreverse(const void *S, void *D, int32_t c) { __asm__ __volatile__( "shrl $1, %%ecx\n\t" "jnc 0f\n\t" // jnc skipit1 "movb (%%rsi), %%al\n\t" "dec %%rsi\n\t" "movb %%al, (%%rdi)\n\t" "inc %%rdi\n\t" "0:\n\t" // skipit1: "shrl $1, %%ecx\n\t" "jnc 1f\n\t" // jnc skipit2 "movw -1(%%rsi), %%ax\n\t" "sub $2, %%rsi\n\t" "rorw $8, %%ax\n\t" "movw %%ax, (%%rdi)\n\t" "add $2, %%rdi\n\t" "1:\n\t" // skipit2 "testl %%ecx, %%ecx\n\t" "jz 3f\n\t" // jz endloop "2:\n\t" // begloop "movl -3(%%rsi), %%eax\n\t" "sub $4, %%rsi\n\t" "bswapl %%eax\n\t" "movl %%eax, (%%rdi)\n\t" "add $4, %%rdi\n\t" "decl %%ecx\n\t" "jnz 2b\n\t" // jnz begloop "3:" : "+S"(S), "+D"(D), "+c"(c) : : "eax", "memory", "cc" ); } #else void copybufreverse(const void *S, void *D, int32_t c) { const char *p = (const char *)S; char *q = (char *)D; while ((c--) > 0) *(q++) = *(p--); } #endif #endif