raze-gles/polymer/eduke32/build/include/pragmas_x86_msvc.h

//
// Microsoft C inline assembler
// 

//{{{

#ifdef __pragmas_h__
#ifndef __pragmas_x86_h__
#define __pragmas_x86_h__

static __inline int32_t sqr(int32_t a)
{
    _asm {
        mov eax, a
            imul eax, eax
    }
}

static __inline int32_t scale(int32_t a, int32_t d, int32_t c)
{
    _asm {
        mov eax, a
            imul d
            idiv c
    }
}

static __inline int32_t mulscale(int32_t a, int32_t d, int32_t c)
{
    _asm {
        mov ecx, c
            mov eax, a
            imul d
            shrd eax, edx, cl
    }
}

#define _scaler(x) \
static __inline int32_t mulscale##x (int32_t a, int32_t d) \
{ \
	_asm mov eax, a \
	_asm imul d \
	_asm shrd eax, edx, x \
} \
static __inline int32_t dmulscale##x (int32_t a, int32_t d, int32_t S, int32_t D) \
{ \
	_asm mov eax, a \
	_asm imul d \
	_asm mov ebx, eax \
	_asm mov eax, S \
	_asm mov esi, edx \
	_asm imul D \
	_asm add eax, ebx \
	_asm adc edx, esi \
	_asm shrd eax, edx, x \
} \


PRAGMA_FUNCS 
#undef _scaler

static __inline int32_t mulscale32(int32_t a, int32_t d)
{
    _asm {
        mov eax, a
            imul d
            mov eax, edx
    }
}

static __inline int32_t dmulscale(int32_t a, int32_t d, int32_t S, int32_t D, int32_t c)
{
    _asm {
        mov ecx, c
            mov eax, a
            imul d
            mov ebx, eax
            mov eax, S
            mov esi, edx
            imul D
            add eax, ebx
            adc edx, esi
            shrd eax, edx, cl
    }
}

static __inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D)
{
    _asm {
        mov eax, a
            imul d
            mov ebx, eax
            mov eax, S
            mov esi, edx
            imul D
            add eax, ebx
            adc edx, esi
            mov eax, edx
    }
}

#ifdef USE_ASM_DIVSCALE
static __inline int32_t divscale(int32_t a, int32_t b, int32_t c)
{
    _asm {
        mov eax, a
            mov ecx, c
            mov edx, eax
            shl eax, cl
            neg cl
            sar edx, cl
            idiv b
    }
}

static __inline int32_t divscale1(int32_t a, int32_t b)
{
    _asm {
        mov eax, a
            add eax, eax
            sbb edx, edx
            idiv b
    }
}

static __inline int32_t divscale2(int32_t a, int32_t b)
{
    _asm {
        mov eax, a
            mov edx, eax
            sar edx, 30
            lea eax, [eax*4]
            idiv b
    }
}

static __inline int32_t divscale3(int32_t a, int32_t b)
{
    _asm {
        mov eax, a
            mov edx, eax
            sar edx, 29
            lea eax, [eax*8]
            idiv b
    }
}

#define DIVSCALE(x,y) \
static __inline int32_t divscale##y(int32_t a, int32_t b) \
{ \
	_asm mov eax, a \
	_asm mov edx, eax \
	_asm sar edx, x \
	_asm shl eax, y \
	_asm idiv b \
}

DIVSCALE(28, 4)	DIVSCALE(27, 5)	DIVSCALE(26, 6)	DIVSCALE(25, 7)
DIVSCALE(24, 8)	DIVSCALE(23, 9)	DIVSCALE(22, 10)	DIVSCALE(21, 11)
DIVSCALE(20, 12)	DIVSCALE(19, 13)	DIVSCALE(18, 14)	DIVSCALE(17, 15)
DIVSCALE(16, 16)	DIVSCALE(15, 17)	DIVSCALE(14, 18)	DIVSCALE(13, 19)
DIVSCALE(12, 20)	DIVSCALE(11, 21)	DIVSCALE(10, 22)	DIVSCALE(9, 23)
DIVSCALE(8, 24)	DIVSCALE(7, 25)	DIVSCALE(6, 26)	DIVSCALE(5, 27)
DIVSCALE(4, 28)	DIVSCALE(3, 29)	DIVSCALE(2, 30)	DIVSCALE(1, 31)

static __inline int32_t divscale32(int32_t d, int32_t b)
{
    _asm {
        mov edx, d
            xor eax, eax
            idiv b
    }
}
#endif  // defined USE_ASM_DIVSCALE

static __inline char readpixel(void *d)
{
    _asm {
        mov edx, d
            mov al, byte ptr[edx]
    }
}

static __inline void drawpixel(void *d, char a)
{
    _asm {
        mov edx, d
            mov al, a
            mov byte ptr[edx], al
    }
}

static __inline void drawpixels(void *d, int16_t a)
{
    _asm {
        mov edx, d
            mov ax, a
            mov word ptr[edx], ax
    }
}

static __inline void drawpixelses(void *d, int32_t a)
{
    _asm {
        mov edx, d
            mov eax, a
            mov dword ptr[edx], eax
    }
}

static __inline void clearbuf(void *d, int32_t c, int32_t a)
{
    _asm {
        mov edi, d
            mov ecx, c
            mov eax, a
            rep stosd
    }
}

static __inline void clearbufbyte(void *d, int32_t c, int32_t a)
{
    _asm {
        mov edi, d
            mov ecx, c
            mov eax, a
            cmp ecx, 4
            jae longcopy
            test cl, 1
            jz preskip
            stosb
        preskip :
        shr ecx, 1
            rep stosw
            jmp endit
        longcopy :
        test edi, 1
            jz skip1
            stosb
            dec ecx
        skip1 :
        test edi, 2
            jz skip2
            stosw
            sub ecx, 2
        skip2 :
              mov ebx, ecx
              shr ecx, 2
              rep stosd
              test bl, 2
              jz skip3
              stosw
          skip3 :
        test bl, 1
            jz endit
            stosb
        endit :
    }
}

static __inline void copybuf(const void *s, void *d, int32_t c)
{
    _asm {
        mov esi, s
            mov edi, d
            mov ecx, c
            rep movsd
    }
}

static __inline void copybufbyte(const void *s, void *d, int32_t c)
{
    _asm {
        mov esi, s
            mov edi, d
            mov ecx, c
            cmp ecx, 4
            jae longcopy
            test cl, 1
            jz preskip
            movsb
        preskip :
        shr ecx, 1
            rep movsw
            jmp endit
        longcopy :
        test edi, 1
            jz skip1
            movsb
            dec ecx
        skip1 :
        test edi, 2
            jz skip2
            movsw
            sub ecx, 2
        skip2 :
              mov ebx, ecx
              shr ecx, 2
              rep movsd
              test bl, 2
              jz skip3
              movsw
          skip3 :
        test bl, 1
            jz endit
            movsb
        endit :
    }
}

static __inline void copybufreverse(const void *s, void *d, int32_t c)
{
    _asm {
        mov esi, s
            mov edi, d
            mov ecx, c
            shr ecx, 1
            jnc skipit1
            mov al, byte ptr[esi]
            dec esi
            mov byte ptr[edi], al
            inc edi
        skipit1 :
        shr ecx, 1
            jnc skipit2
            mov ax, word ptr[esi-1]
            sub esi, 2
            ror ax, 8
            mov word ptr[edi], ax
            add edi, 2
        skipit2:
        test ecx, ecx
            jz endloop
        begloop :
        mov eax, dword ptr[esi-3]
            sub esi, 4
            bswap eax
            mov dword ptr[edi], eax
            add edi, 4
            dec ecx
            jnz begloop
        endloop :
    }
}

static __inline void qinterpolatedown16(int32_t a, int32_t c, int32_t d, int32_t s)
{
    _asm {
        mov eax, a
            mov ecx, c
            mov edx, d
            mov esi, s
            mov ebx, ecx
            shr ecx, 1
            jz skipbegcalc
        begqcalc :
        lea edi, [edx+esi]
            sar edx, 16
            mov dword ptr[eax], edx
            lea edx, [edi+esi]
            sar edi, 16
            mov dword ptr[eax+4], edi
            add eax, 8
            dec ecx
            jnz begqcalc
            test ebx, 1
            jz skipbegqcalc2
        skipbegcalc :
        sar edx, 16
            mov dword ptr[eax], edx
        skipbegqcalc2 :
    }
}

static __inline void qinterpolatedown16short(int32_t a, int32_t c, int32_t d, int32_t s)
{
    _asm {
        mov eax, a
            mov ecx, c
            mov edx, d
            mov esi, s
            test ecx, ecx
            jz endit
            test al, 2
            jz skipalignit
            mov ebx, edx
            sar ebx, 16
            mov word ptr[eax], bx
            add edx, esi
            add eax, 2
            dec ecx
            jz endit
        skipalignit :
        sub ecx, 2
            jc finishit
        begqcalc :
        mov ebx, edx
            add edx, esi
            sar ebx, 16
            mov edi, edx
            and edi, 0ffff0000h
            add edx, esi
            add ebx, edi
            mov dword ptr[eax], ebx
            add eax, 4
            sub ecx, 2
            jnc begqcalc
            test cl, 1
            jz endit
        finishit :
        mov ebx, edx
            sar ebx, 16
            mov word ptr[eax], bx
        endit :
    }
}

//returns eax/ebx, dmval = eax%edx;
static __inline int32_t divmod(int32_t a, int32_t b)
{
    _asm {
        mov eax, a
            xor edx, edx
            div b
            mov dmval, edx
    }
}

//returns eax%ebx, dmval = eax/edx;
static __inline int32_t moddiv(int32_t a, int32_t b)
{
    _asm {
        mov eax, a
            xor edx, edx
            div b
            mov dmval, eax
            mov eax, edx
    }
}

static __inline int32_t klabs(int32_t a)
{
    _asm {
        mov eax, a
            test eax, eax
            jns skipnegate
            neg eax
        skipnegate :
    }
}

static __inline int32_t ksgn(int32_t b)
{
    _asm {
        mov ebx, b
            add ebx, ebx
            sbb eax, eax
            cmp eax, ebx
            adc al, 0
    }
}

//eax = (unsigned min)umin(eax,ebx)
static __inline int32_t umin(int32_t a, int32_t b)
{
    _asm {
        mov eax, a
            sub eax, b
            sbb ecx, ecx
            and eax, ecx
            add eax, b
    }
}

//eax = (unsigned max)umax(eax,ebx)
static __inline int32_t umax(int32_t a, int32_t b)
{
    _asm {
        mov eax, a
            sub eax, b
            sbb ecx, ecx
            xor ecx, 0xffffffff
            and eax, ecx
            add eax, b
    }
}

static __inline int32_t kmin(int32_t a, int32_t b)
{
    _asm {
        mov eax, a
            mov ebx, b
            cmp eax, ebx
            jl skipit
            mov eax, ebx
        skipit :
    }
}

static __inline int32_t kmax(int32_t a, int32_t b)
{
    _asm {
        mov eax, a
            mov ebx, b
            cmp eax, ebx
            jg skipit
            mov eax, ebx
        skipit :
    }
}

static __inline void swapchar(void *a, void *b)
{
    _asm {
        mov eax, a
            mov ebx, b
            mov cl, [eax]
            mov ch, [ebx]
            mov[ebx], cl
            mov[eax], ch
    }
}

static __inline void swapshort(void *a, void *b)
{
    _asm {
        mov eax, a
            mov ebx, b
            mov cx, [eax]
            mov dx, [ebx]
            mov[ebx], cx
            mov[eax], dx
    }
}

static __inline void swaplong(void *a, void *b)
{
    _asm {
        mov eax, a
            mov ebx, b
            mov ecx, [eax]
            mov edx, [ebx]
            mov[ebx], ecx
            mov[eax], edx
    }
}

#define swapfloat swaplong

static __inline void swapbuf4(void *a, void *b, int32_t c)
{
    _asm {
        mov eax, a
            mov ebx, b
            mov ecx, c
        begswap :
        mov esi, [eax]
            mov edi, [ebx]
            mov[ebx], esi
            mov[eax], edi
            add eax, 4
            add ebx, 4
            dec ecx
            jnz short begswap
    }
}

static __inline void swap64bit(void *a, void *b)
{
    _asm {
        mov eax, a
            mov ebx, b
            mov ecx, [eax]
            mov edx, [ebx]
            mov[ebx], ecx
            mov ecx, [eax+4]
            mov[eax], edx
            mov edx, [ebx+4]
            mov[ebx+4], ecx
            mov[eax+4], edx
    }
}

//swapchar2(ptr1,ptr2,xsiz); is the same as:
//swapchar(ptr1,ptr2); swapchar(ptr1+1,ptr2+xsiz);
static __inline void swapchar2(void *a, void *b, int32_t s)
{
    _asm {
        mov eax, a
            mov ebx, b
            mov esi, s
            add esi, ebx
            mov cx, [eax]
            mov dl, [ebx]
            mov[ebx], cl
            mov dh, [esi]
            mov[esi], ch
            mov[eax], dx
    }
}
//}}}

#endif // __pragmas_x86_h__
#endif // __pragmas_h__
pragmas.h cleanup part 1: separate into pragmas.h, pragmas_arm.h, pragmas_ppc.h, pragmas_x86_gcc.h and pragmas_x86_msvc.h. Remove "boundmulscale" as it was unused. git-svn-id: https://svn.eduke32.com/eduke32@4601 1a8010ca-5511-0410-912e-c29ae57300e0 2014-09-30 04:04:53 +00:00			`//`
			`// Microsoft C inline assembler`
			`//`

			`//{{{`

			`#ifdef __pragmas_h__`
			`#ifndef __pragmas_x86_h__`
			`#define __pragmas_x86_h__`

			`static __inline int32_t sqr(int32_t a)`
			`{`
			`_asm {`
			`mov eax, a`
			`imul eax, eax`
			`}`
			`}`

			`static __inline int32_t scale(int32_t a, int32_t d, int32_t c)`
			`{`
			`_asm {`
			`mov eax, a`
			`imul d`
			`idiv c`
			`}`
			`}`

			`static __inline int32_t mulscale(int32_t a, int32_t d, int32_t c)`
			`{`
			`_asm {`
			`mov ecx, c`
			`mov eax, a`
			`imul d`
			`shrd eax, edx, cl`
			`}`
			`}`

Remove a few unused things (tmulscale, etc) from the pragmas.h family of headers git-svn-id: https://svn.eduke32.com/eduke32@4604 1a8010ca-5511-0410-912e-c29ae57300e0 2014-09-30 04:05:40 +00:00			`#define _scaler(x) \`
pragmas.h cleanup part 1: separate into pragmas.h, pragmas_arm.h, pragmas_ppc.h, pragmas_x86_gcc.h and pragmas_x86_msvc.h. Remove "boundmulscale" as it was unused. git-svn-id: https://svn.eduke32.com/eduke32@4601 1a8010ca-5511-0410-912e-c29ae57300e0 2014-09-30 04:04:53 +00:00			`static __inline int32_t mulscale##x (int32_t a, int32_t d) \`
			`{ \`
			`_asm mov eax, a \`
			`_asm imul d \`
			`_asm shrd eax, edx, x \`
Remove a few unused things (tmulscale, etc) from the pragmas.h family of headers git-svn-id: https://svn.eduke32.com/eduke32@4604 1a8010ca-5511-0410-912e-c29ae57300e0 2014-09-30 04:05:40 +00:00			`} \`
			`static __inline int32_t dmulscale##x (int32_t a, int32_t d, int32_t S, int32_t D) \`
			`{ \`
			`_asm mov eax, a \`
			`_asm imul d \`
			`_asm mov ebx, eax \`
			`_asm mov eax, S \`
			`_asm mov esi, edx \`
			`_asm imul D \`
			`_asm add eax, ebx \`
			`_asm adc edx, esi \`
			`_asm shrd eax, edx, x \`
			`} \`


			`PRAGMA_FUNCS`
			`#undef _scaler`
pragmas.h cleanup part 1: separate into pragmas.h, pragmas_arm.h, pragmas_ppc.h, pragmas_x86_gcc.h and pragmas_x86_msvc.h. Remove "boundmulscale" as it was unused. git-svn-id: https://svn.eduke32.com/eduke32@4601 1a8010ca-5511-0410-912e-c29ae57300e0 2014-09-30 04:04:53 +00:00
			`static __inline int32_t mulscale32(int32_t a, int32_t d)`
			`{`
			`_asm {`
			`mov eax, a`
			`imul d`
			`mov eax, edx`
			`}`
			`}`

			`static __inline int32_t dmulscale(int32_t a, int32_t d, int32_t S, int32_t D, int32_t c)`
			`{`
			`_asm {`
			`mov ecx, c`
			`mov eax, a`
			`imul d`
			`mov ebx, eax`
			`mov eax, S`
			`mov esi, edx`
			`imul D`
			`add eax, ebx`
			`adc edx, esi`
			`shrd eax, edx, cl`
			`}`
			`}`

			`static __inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D)`
			`{`
			`_asm {`
			`mov eax, a`
			`imul d`
			`mov ebx, eax`
			`mov eax, S`
			`mov esi, edx`
			`imul D`
			`add eax, ebx`
			`adc edx, esi`
			`mov eax, edx`
			`}`
			`}`

			`#ifdef USE_ASM_DIVSCALE`
			`static __inline int32_t divscale(int32_t a, int32_t b, int32_t c)`
			`{`
			`_asm {`
			`mov eax, a`
			`mov ecx, c`
			`mov edx, eax`
			`shl eax, cl`
			`neg cl`
			`sar edx, cl`
			`idiv b`
			`}`
			`}`

			`static __inline int32_t divscale1(int32_t a, int32_t b)`
			`{`
			`_asm {`
			`mov eax, a`
			`add eax, eax`
			`sbb edx, edx`
			`idiv b`
			`}`
			`}`

			`static __inline int32_t divscale2(int32_t a, int32_t b)`
			`{`
			`_asm {`
			`mov eax, a`
			`mov edx, eax`
			`sar edx, 30`
			`lea eax, [eax*4]`
			`idiv b`
			`}`
			`}`

			`static __inline int32_t divscale3(int32_t a, int32_t b)`
			`{`
			`_asm {`
			`mov eax, a`
			`mov edx, eax`
			`sar edx, 29`
			`lea eax, [eax*8]`
			`idiv b`
			`}`
			`}`

			`#define DIVSCALE(x,y) \`
			`static __inline int32_t divscale##y(int32_t a, int32_t b) \`
			`{ \`
			`_asm mov eax, a \`
			`_asm mov edx, eax \`
			`_asm sar edx, x \`
			`_asm shl eax, y \`
			`_asm idiv b \`
			`}`

			`DIVSCALE(28, 4) DIVSCALE(27, 5) DIVSCALE(26, 6) DIVSCALE(25, 7)`
			`DIVSCALE(24, 8) DIVSCALE(23, 9) DIVSCALE(22, 10) DIVSCALE(21, 11)`
			`DIVSCALE(20, 12) DIVSCALE(19, 13) DIVSCALE(18, 14) DIVSCALE(17, 15)`
			`DIVSCALE(16, 16) DIVSCALE(15, 17) DIVSCALE(14, 18) DIVSCALE(13, 19)`
			`DIVSCALE(12, 20) DIVSCALE(11, 21) DIVSCALE(10, 22) DIVSCALE(9, 23)`
			`DIVSCALE(8, 24) DIVSCALE(7, 25) DIVSCALE(6, 26) DIVSCALE(5, 27)`
			`DIVSCALE(4, 28) DIVSCALE(3, 29) DIVSCALE(2, 30) DIVSCALE(1, 31)`

			`static __inline int32_t divscale32(int32_t d, int32_t b)`
			`{`
			`_asm {`
			`mov edx, d`
			`xor eax, eax`
			`idiv b`
			`}`
			`}`
			`#endif // defined USE_ASM_DIVSCALE`

			`static __inline char readpixel(void *d)`
			`{`
			`_asm {`
			`mov edx, d`
			`mov al, byte ptr[edx]`
			`}`
			`}`

			`static __inline void drawpixel(void *d, char a)`
			`{`
			`_asm {`
			`mov edx, d`
			`mov al, a`
			`mov byte ptr[edx], al`
			`}`
			`}`

			`static __inline void drawpixels(void *d, int16_t a)`
			`{`
			`_asm {`
			`mov edx, d`
			`mov ax, a`
			`mov word ptr[edx], ax`
			`}`
			`}`

			`static __inline void drawpixelses(void *d, int32_t a)`
			`{`
			`_asm {`
			`mov edx, d`
			`mov eax, a`
			`mov dword ptr[edx], eax`
			`}`
			`}`

			`static __inline void clearbuf(void *d, int32_t c, int32_t a)`
			`{`
			`_asm {`
			`mov edi, d`
			`mov ecx, c`
			`mov eax, a`
			`rep stosd`
			`}`
			`}`

			`static __inline void clearbufbyte(void *d, int32_t c, int32_t a)`
			`{`
			`_asm {`
			`mov edi, d`
			`mov ecx, c`
			`mov eax, a`
			`cmp ecx, 4`
			`jae longcopy`
			`test cl, 1`
			`jz preskip`
			`stosb`
			`preskip :`
			`shr ecx, 1`
			`rep stosw`
			`jmp endit`
			`longcopy :`
			`test edi, 1`
			`jz skip1`
			`stosb`
			`dec ecx`
			`skip1 :`
			`test edi, 2`
			`jz skip2`
			`stosw`
			`sub ecx, 2`
			`skip2 :`
			`mov ebx, ecx`
			`shr ecx, 2`
			`rep stosd`
			`test bl, 2`
			`jz skip3`
			`stosw`
			`skip3 :`
			`test bl, 1`
			`jz endit`
			`stosb`
			`endit :`
			`}`
			`}`

			`static __inline void copybuf(const void s, void d, int32_t c)`
			`{`
			`_asm {`
			`mov esi, s`
			`mov edi, d`
			`mov ecx, c`
			`rep movsd`
			`}`
			`}`

			`static __inline void copybufbyte(const void s, void d, int32_t c)`
			`{`
			`_asm {`
			`mov esi, s`
			`mov edi, d`
			`mov ecx, c`
			`cmp ecx, 4`
			`jae longcopy`
			`test cl, 1`
			`jz preskip`
			`movsb`
			`preskip :`
			`shr ecx, 1`
			`rep movsw`
			`jmp endit`
			`longcopy :`
			`test edi, 1`
			`jz skip1`
			`movsb`
			`dec ecx`
			`skip1 :`
			`test edi, 2`
			`jz skip2`
			`movsw`
			`sub ecx, 2`
			`skip2 :`
			`mov ebx, ecx`
			`shr ecx, 2`
			`rep movsd`
			`test bl, 2`
			`jz skip3`
			`movsw`
			`skip3 :`
			`test bl, 1`
			`jz endit`
			`movsb`
			`endit :`
			`}`
			`}`

			`static __inline void copybufreverse(const void s, void d, int32_t c)`
			`{`
			`_asm {`
			`mov esi, s`
			`mov edi, d`
			`mov ecx, c`
			`shr ecx, 1`
			`jnc skipit1`
			`mov al, byte ptr[esi]`
			`dec esi`
			`mov byte ptr[edi], al`
			`inc edi`
			`skipit1 :`
			`shr ecx, 1`
			`jnc skipit2`
			`mov ax, word ptr[esi-1]`
			`sub esi, 2`
			`ror ax, 8`
			`mov word ptr[edi], ax`
			`add edi, 2`
			`skipit2:`
			`test ecx, ecx`
			`jz endloop`
			`begloop :`
			`mov eax, dword ptr[esi-3]`
			`sub esi, 4`
			`bswap eax`
			`mov dword ptr[edi], eax`
			`add edi, 4`
			`dec ecx`
			`jnz begloop`
			`endloop :`
			`}`
			`}`

			`static __inline void qinterpolatedown16(int32_t a, int32_t c, int32_t d, int32_t s)`
			`{`
			`_asm {`
			`mov eax, a`
			`mov ecx, c`
			`mov edx, d`
			`mov esi, s`
			`mov ebx, ecx`
			`shr ecx, 1`
			`jz skipbegcalc`
			`begqcalc :`
			`lea edi, [edx+esi]`
			`sar edx, 16`
			`mov dword ptr[eax], edx`
			`lea edx, [edi+esi]`
			`sar edi, 16`
			`mov dword ptr[eax+4], edi`
			`add eax, 8`
			`dec ecx`
			`jnz begqcalc`
			`test ebx, 1`
			`jz skipbegqcalc2`
			`skipbegcalc :`
			`sar edx, 16`
			`mov dword ptr[eax], edx`
			`skipbegqcalc2 :`
			`}`
			`}`

			`static __inline void qinterpolatedown16short(int32_t a, int32_t c, int32_t d, int32_t s)`
			`{`
			`_asm {`
			`mov eax, a`
			`mov ecx, c`
			`mov edx, d`
			`mov esi, s`
			`test ecx, ecx`
			`jz endit`
			`test al, 2`
			`jz skipalignit`
			`mov ebx, edx`
			`sar ebx, 16`
			`mov word ptr[eax], bx`
			`add edx, esi`
			`add eax, 2`
			`dec ecx`
			`jz endit`
			`skipalignit :`
			`sub ecx, 2`
			`jc finishit`
			`begqcalc :`
			`mov ebx, edx`
			`add edx, esi`
			`sar ebx, 16`
			`mov edi, edx`
			`and edi, 0ffff0000h`
			`add edx, esi`
			`add ebx, edi`
			`mov dword ptr[eax], ebx`
			`add eax, 4`
			`sub ecx, 2`
			`jnc begqcalc`
			`test cl, 1`
			`jz endit`
			`finishit :`
			`mov ebx, edx`
			`sar ebx, 16`
			`mov word ptr[eax], bx`
			`endit :`
			`}`
			`}`

			`//returns eax/ebx, dmval = eax%edx;`
			`static __inline int32_t divmod(int32_t a, int32_t b)`
			`{`
			`_asm {`
			`mov eax, a`
			`xor edx, edx`
			`div b`
			`mov dmval, edx`
			`}`
			`}`

			`//returns eax%ebx, dmval = eax/edx;`
			`static __inline int32_t moddiv(int32_t a, int32_t b)`
			`{`
			`_asm {`
			`mov eax, a`
			`xor edx, edx`
			`div b`
			`mov dmval, eax`
			`mov eax, edx`
			`}`
			`}`

			`static __inline int32_t klabs(int32_t a)`
			`{`
			`_asm {`
			`mov eax, a`
			`test eax, eax`
			`jns skipnegate`
			`neg eax`
			`skipnegate :`
			`}`
			`}`

			`static __inline int32_t ksgn(int32_t b)`
			`{`
			`_asm {`
			`mov ebx, b`
			`add ebx, ebx`
			`sbb eax, eax`
			`cmp eax, ebx`
			`adc al, 0`
			`}`
			`}`

			`//eax = (unsigned min)umin(eax,ebx)`
			`static __inline int32_t umin(int32_t a, int32_t b)`
			`{`
			`_asm {`
			`mov eax, a`
			`sub eax, b`
			`sbb ecx, ecx`
			`and eax, ecx`
			`add eax, b`
			`}`
			`}`

			`//eax = (unsigned max)umax(eax,ebx)`
			`static __inline int32_t umax(int32_t a, int32_t b)`
			`{`
			`_asm {`
			`mov eax, a`
			`sub eax, b`
			`sbb ecx, ecx`
			`xor ecx, 0xffffffff`
			`and eax, ecx`
			`add eax, b`
			`}`
			`}`

			`static __inline int32_t kmin(int32_t a, int32_t b)`
			`{`
			`_asm {`
			`mov eax, a`
			`mov ebx, b`
			`cmp eax, ebx`
			`jl skipit`
			`mov eax, ebx`
			`skipit :`
			`}`
			`}`

			`static __inline int32_t kmax(int32_t a, int32_t b)`
			`{`
			`_asm {`
			`mov eax, a`
			`mov ebx, b`
			`cmp eax, ebx`
			`jg skipit`
			`mov eax, ebx`
			`skipit :`
			`}`
			`}`

			`static __inline void swapchar(void a, void b)`
			`{`
			`_asm {`
			`mov eax, a`
			`mov ebx, b`
			`mov cl, [eax]`
			`mov ch, [ebx]`
			`mov[ebx], cl`
			`mov[eax], ch`
			`}`
			`}`

			`static __inline void swapshort(void a, void b)`
			`{`
			`_asm {`
			`mov eax, a`
			`mov ebx, b`
			`mov cx, [eax]`
			`mov dx, [ebx]`
			`mov[ebx], cx`
			`mov[eax], dx`
			`}`
			`}`

			`static __inline void swaplong(void a, void b)`
			`{`
			`_asm {`
			`mov eax, a`
			`mov ebx, b`
			`mov ecx, [eax]`
			`mov edx, [ebx]`
			`mov[ebx], ecx`
			`mov[eax], edx`
			`}`
			`}`

Remove a few unused things (tmulscale, etc) from the pragmas.h family of headers git-svn-id: https://svn.eduke32.com/eduke32@4604 1a8010ca-5511-0410-912e-c29ae57300e0 2014-09-30 04:05:40 +00:00			`#define swapfloat swaplong`

pragmas.h cleanup part 1: separate into pragmas.h, pragmas_arm.h, pragmas_ppc.h, pragmas_x86_gcc.h and pragmas_x86_msvc.h. Remove "boundmulscale" as it was unused. git-svn-id: https://svn.eduke32.com/eduke32@4601 1a8010ca-5511-0410-912e-c29ae57300e0 2014-09-30 04:04:53 +00:00			`static __inline void swapbuf4(void a, void b, int32_t c)`
			`{`
			`_asm {`
			`mov eax, a`
			`mov ebx, b`
			`mov ecx, c`
			`begswap :`
			`mov esi, [eax]`
			`mov edi, [ebx]`
			`mov[ebx], esi`
			`mov[eax], edi`
			`add eax, 4`
			`add ebx, 4`
			`dec ecx`
			`jnz short begswap`
			`}`
			`}`

			`static __inline void swap64bit(void a, void b)`
			`{`
			`_asm {`
			`mov eax, a`
			`mov ebx, b`
			`mov ecx, [eax]`
			`mov edx, [ebx]`
			`mov[ebx], ecx`
			`mov ecx, [eax+4]`
			`mov[eax], edx`
			`mov edx, [ebx+4]`
			`mov[ebx+4], ecx`
			`mov[eax+4], edx`
			`}`
			`}`

			`//swapchar2(ptr1,ptr2,xsiz); is the same as:`
			`//swapchar(ptr1,ptr2); swapchar(ptr1+1,ptr2+xsiz);`
			`static __inline void swapchar2(void a, void b, int32_t s)`
			`{`
			`_asm {`
			`mov eax, a`
			`mov ebx, b`
			`mov esi, s`
			`add esi, ebx`
			`mov cx, [eax]`
			`mov dl, [ebx]`
			`mov[ebx], cl`
			`mov dh, [esi]`
			`mov[esi], ch`
			`mov[eax], dx`
			`}`
			`}`
			`//}}}`

			`#endif // __pragmas_x86_h__`
			`#endif // __pragmas_h__`