raze-gles/polymer/eduke32/build/include/pragmas_x86_msvc.h
2014-09-30 04:05:40 +00:00

604 lines
12 KiB
C

//
// Microsoft C inline assembler
//
//{{{
#ifdef __pragmas_h__
#ifndef __pragmas_x86_h__
#define __pragmas_x86_h__
static __inline int32_t sqr(int32_t a)
{
_asm {
mov eax, a
imul eax, eax
}
}
static __inline int32_t scale(int32_t a, int32_t d, int32_t c)
{
_asm {
mov eax, a
imul d
idiv c
}
}
static __inline int32_t mulscale(int32_t a, int32_t d, int32_t c)
{
_asm {
mov ecx, c
mov eax, a
imul d
shrd eax, edx, cl
}
}
#define _scaler(x) \
static __inline int32_t mulscale##x (int32_t a, int32_t d) \
{ \
_asm mov eax, a \
_asm imul d \
_asm shrd eax, edx, x \
} \
static __inline int32_t dmulscale##x (int32_t a, int32_t d, int32_t S, int32_t D) \
{ \
_asm mov eax, a \
_asm imul d \
_asm mov ebx, eax \
_asm mov eax, S \
_asm mov esi, edx \
_asm imul D \
_asm add eax, ebx \
_asm adc edx, esi \
_asm shrd eax, edx, x \
} \
PRAGMA_FUNCS
#undef _scaler
static __inline int32_t mulscale32(int32_t a, int32_t d)
{
_asm {
mov eax, a
imul d
mov eax, edx
}
}
static __inline int32_t dmulscale(int32_t a, int32_t d, int32_t S, int32_t D, int32_t c)
{
_asm {
mov ecx, c
mov eax, a
imul d
mov ebx, eax
mov eax, S
mov esi, edx
imul D
add eax, ebx
adc edx, esi
shrd eax, edx, cl
}
}
static __inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D)
{
_asm {
mov eax, a
imul d
mov ebx, eax
mov eax, S
mov esi, edx
imul D
add eax, ebx
adc edx, esi
mov eax, edx
}
}
#ifdef USE_ASM_DIVSCALE
static __inline int32_t divscale(int32_t a, int32_t b, int32_t c)
{
_asm {
mov eax, a
mov ecx, c
mov edx, eax
shl eax, cl
neg cl
sar edx, cl
idiv b
}
}
static __inline int32_t divscale1(int32_t a, int32_t b)
{
_asm {
mov eax, a
add eax, eax
sbb edx, edx
idiv b
}
}
static __inline int32_t divscale2(int32_t a, int32_t b)
{
_asm {
mov eax, a
mov edx, eax
sar edx, 30
lea eax, [eax*4]
idiv b
}
}
static __inline int32_t divscale3(int32_t a, int32_t b)
{
_asm {
mov eax, a
mov edx, eax
sar edx, 29
lea eax, [eax*8]
idiv b
}
}
#define DIVSCALE(x,y) \
static __inline int32_t divscale##y(int32_t a, int32_t b) \
{ \
_asm mov eax, a \
_asm mov edx, eax \
_asm sar edx, x \
_asm shl eax, y \
_asm idiv b \
}
DIVSCALE(28, 4) DIVSCALE(27, 5) DIVSCALE(26, 6) DIVSCALE(25, 7)
DIVSCALE(24, 8) DIVSCALE(23, 9) DIVSCALE(22, 10) DIVSCALE(21, 11)
DIVSCALE(20, 12) DIVSCALE(19, 13) DIVSCALE(18, 14) DIVSCALE(17, 15)
DIVSCALE(16, 16) DIVSCALE(15, 17) DIVSCALE(14, 18) DIVSCALE(13, 19)
DIVSCALE(12, 20) DIVSCALE(11, 21) DIVSCALE(10, 22) DIVSCALE(9, 23)
DIVSCALE(8, 24) DIVSCALE(7, 25) DIVSCALE(6, 26) DIVSCALE(5, 27)
DIVSCALE(4, 28) DIVSCALE(3, 29) DIVSCALE(2, 30) DIVSCALE(1, 31)
static __inline int32_t divscale32(int32_t d, int32_t b)
{
_asm {
mov edx, d
xor eax, eax
idiv b
}
}
#endif // defined USE_ASM_DIVSCALE
static __inline char readpixel(void *d)
{
_asm {
mov edx, d
mov al, byte ptr[edx]
}
}
static __inline void drawpixel(void *d, char a)
{
_asm {
mov edx, d
mov al, a
mov byte ptr[edx], al
}
}
static __inline void drawpixels(void *d, int16_t a)
{
_asm {
mov edx, d
mov ax, a
mov word ptr[edx], ax
}
}
static __inline void drawpixelses(void *d, int32_t a)
{
_asm {
mov edx, d
mov eax, a
mov dword ptr[edx], eax
}
}
static __inline void clearbuf(void *d, int32_t c, int32_t a)
{
_asm {
mov edi, d
mov ecx, c
mov eax, a
rep stosd
}
}
static __inline void clearbufbyte(void *d, int32_t c, int32_t a)
{
_asm {
mov edi, d
mov ecx, c
mov eax, a
cmp ecx, 4
jae longcopy
test cl, 1
jz preskip
stosb
preskip :
shr ecx, 1
rep stosw
jmp endit
longcopy :
test edi, 1
jz skip1
stosb
dec ecx
skip1 :
test edi, 2
jz skip2
stosw
sub ecx, 2
skip2 :
mov ebx, ecx
shr ecx, 2
rep stosd
test bl, 2
jz skip3
stosw
skip3 :
test bl, 1
jz endit
stosb
endit :
}
}
static __inline void copybuf(const void *s, void *d, int32_t c)
{
_asm {
mov esi, s
mov edi, d
mov ecx, c
rep movsd
}
}
static __inline void copybufbyte(const void *s, void *d, int32_t c)
{
_asm {
mov esi, s
mov edi, d
mov ecx, c
cmp ecx, 4
jae longcopy
test cl, 1
jz preskip
movsb
preskip :
shr ecx, 1
rep movsw
jmp endit
longcopy :
test edi, 1
jz skip1
movsb
dec ecx
skip1 :
test edi, 2
jz skip2
movsw
sub ecx, 2
skip2 :
mov ebx, ecx
shr ecx, 2
rep movsd
test bl, 2
jz skip3
movsw
skip3 :
test bl, 1
jz endit
movsb
endit :
}
}
static __inline void copybufreverse(const void *s, void *d, int32_t c)
{
_asm {
mov esi, s
mov edi, d
mov ecx, c
shr ecx, 1
jnc skipit1
mov al, byte ptr[esi]
dec esi
mov byte ptr[edi], al
inc edi
skipit1 :
shr ecx, 1
jnc skipit2
mov ax, word ptr[esi-1]
sub esi, 2
ror ax, 8
mov word ptr[edi], ax
add edi, 2
skipit2:
test ecx, ecx
jz endloop
begloop :
mov eax, dword ptr[esi-3]
sub esi, 4
bswap eax
mov dword ptr[edi], eax
add edi, 4
dec ecx
jnz begloop
endloop :
}
}
static __inline void qinterpolatedown16(int32_t a, int32_t c, int32_t d, int32_t s)
{
_asm {
mov eax, a
mov ecx, c
mov edx, d
mov esi, s
mov ebx, ecx
shr ecx, 1
jz skipbegcalc
begqcalc :
lea edi, [edx+esi]
sar edx, 16
mov dword ptr[eax], edx
lea edx, [edi+esi]
sar edi, 16
mov dword ptr[eax+4], edi
add eax, 8
dec ecx
jnz begqcalc
test ebx, 1
jz skipbegqcalc2
skipbegcalc :
sar edx, 16
mov dword ptr[eax], edx
skipbegqcalc2 :
}
}
static __inline void qinterpolatedown16short(int32_t a, int32_t c, int32_t d, int32_t s)
{
_asm {
mov eax, a
mov ecx, c
mov edx, d
mov esi, s
test ecx, ecx
jz endit
test al, 2
jz skipalignit
mov ebx, edx
sar ebx, 16
mov word ptr[eax], bx
add edx, esi
add eax, 2
dec ecx
jz endit
skipalignit :
sub ecx, 2
jc finishit
begqcalc :
mov ebx, edx
add edx, esi
sar ebx, 16
mov edi, edx
and edi, 0ffff0000h
add edx, esi
add ebx, edi
mov dword ptr[eax], ebx
add eax, 4
sub ecx, 2
jnc begqcalc
test cl, 1
jz endit
finishit :
mov ebx, edx
sar ebx, 16
mov word ptr[eax], bx
endit :
}
}
//returns eax/ebx, dmval = eax%edx;
static __inline int32_t divmod(int32_t a, int32_t b)
{
_asm {
mov eax, a
xor edx, edx
div b
mov dmval, edx
}
}
//returns eax%ebx, dmval = eax/edx;
static __inline int32_t moddiv(int32_t a, int32_t b)
{
_asm {
mov eax, a
xor edx, edx
div b
mov dmval, eax
mov eax, edx
}
}
static __inline int32_t klabs(int32_t a)
{
_asm {
mov eax, a
test eax, eax
jns skipnegate
neg eax
skipnegate :
}
}
static __inline int32_t ksgn(int32_t b)
{
_asm {
mov ebx, b
add ebx, ebx
sbb eax, eax
cmp eax, ebx
adc al, 0
}
}
//eax = (unsigned min)umin(eax,ebx)
static __inline int32_t umin(int32_t a, int32_t b)
{
_asm {
mov eax, a
sub eax, b
sbb ecx, ecx
and eax, ecx
add eax, b
}
}
//eax = (unsigned max)umax(eax,ebx)
static __inline int32_t umax(int32_t a, int32_t b)
{
_asm {
mov eax, a
sub eax, b
sbb ecx, ecx
xor ecx, 0xffffffff
and eax, ecx
add eax, b
}
}
static __inline int32_t kmin(int32_t a, int32_t b)
{
_asm {
mov eax, a
mov ebx, b
cmp eax, ebx
jl skipit
mov eax, ebx
skipit :
}
}
static __inline int32_t kmax(int32_t a, int32_t b)
{
_asm {
mov eax, a
mov ebx, b
cmp eax, ebx
jg skipit
mov eax, ebx
skipit :
}
}
static __inline void swapchar(void *a, void *b)
{
_asm {
mov eax, a
mov ebx, b
mov cl, [eax]
mov ch, [ebx]
mov[ebx], cl
mov[eax], ch
}
}
static __inline void swapshort(void *a, void *b)
{
_asm {
mov eax, a
mov ebx, b
mov cx, [eax]
mov dx, [ebx]
mov[ebx], cx
mov[eax], dx
}
}
static __inline void swaplong(void *a, void *b)
{
_asm {
mov eax, a
mov ebx, b
mov ecx, [eax]
mov edx, [ebx]
mov[ebx], ecx
mov[eax], edx
}
}
#define swapfloat swaplong
static __inline void swapbuf4(void *a, void *b, int32_t c)
{
_asm {
mov eax, a
mov ebx, b
mov ecx, c
begswap :
mov esi, [eax]
mov edi, [ebx]
mov[ebx], esi
mov[eax], edi
add eax, 4
add ebx, 4
dec ecx
jnz short begswap
}
}
static __inline void swap64bit(void *a, void *b)
{
_asm {
mov eax, a
mov ebx, b
mov ecx, [eax]
mov edx, [ebx]
mov[ebx], ecx
mov ecx, [eax+4]
mov[eax], edx
mov edx, [ebx+4]
mov[ebx+4], ecx
mov[eax+4], edx
}
}
//swapchar2(ptr1,ptr2,xsiz); is the same as:
//swapchar(ptr1,ptr2); swapchar(ptr1+1,ptr2+xsiz);
static __inline void swapchar2(void *a, void *b, int32_t s)
{
_asm {
mov eax, a
mov ebx, b
mov esi, s
add esi, ebx
mov cx, [eax]
mov dl, [ebx]
mov[ebx], cl
mov dh, [esi]
mov[esi], ch
mov[eax], dx
}
}
//}}}
#endif // __pragmas_x86_h__
#endif // __pragmas_h__