// // Microsoft C inline assembler // //{{{ #ifdef __pragmas_h__ #ifndef __pragmas_x86_h__ #define __pragmas_x86_h__ static __inline int32_t mulscale(int32_t a, int32_t d, int32_t c) { _asm { mov ecx, c mov eax, a imul d shrd eax, edx, cl } } #define EDUKE32_SCALER_PRAGMA(x) \ static __inline int32_t mulscale##x (int32_t a, int32_t d) \ { \ _asm mov eax, a \ _asm imul d \ _asm shrd eax, edx, x \ } \ static __inline int32_t dmulscale##x (int32_t a, int32_t d, int32_t S, int32_t D) \ { \ _asm mov eax, a \ _asm imul d \ _asm mov ebx, eax \ _asm mov eax, S \ _asm mov esi, edx \ _asm imul D \ _asm add eax, ebx \ _asm adc edx, esi \ _asm shrd eax, edx, x \ } \ EDUKE32_GENERATE_PRAGMAS #undef EDUKE32_SCALER_PRAGMA static __inline int32_t mulscale32(int32_t a, int32_t d) { _asm { mov eax, a imul d mov eax, edx } } static __inline int32_t dmulscale(int32_t a, int32_t d, int32_t S, int32_t D, int32_t c) { _asm { mov ecx, c mov eax, a imul d mov ebx, eax mov eax, S mov esi, edx imul D add eax, ebx adc edx, esi shrd eax, edx, cl } } static __inline int32_t dmulscale32(int32_t a, int32_t d, int32_t S, int32_t D) { _asm { mov eax, a imul d mov ebx, eax mov eax, S mov esi, edx imul D add eax, ebx adc edx, esi mov eax, edx } } static __inline char readpixel(void *d) { _asm { mov edx, d mov al, byte ptr[edx] } } static __inline void drawpixel(void *d, char a) { _asm { mov edx, d mov al, a mov byte ptr[edx], al } } static __inline void clearbuf(void *d, int32_t c, int32_t a) { _asm { mov edi, d mov ecx, c mov eax, a rep stosd } } static __inline void clearbufbyte(void *d, int32_t c, int32_t a) { _asm { mov edi, d mov ecx, c mov eax, a cmp ecx, 4 jae longcopy test cl, 1 jz preskip stosb preskip : shr ecx, 1 rep stosw jmp endit longcopy : test edi, 1 jz skip1 stosb dec ecx skip1 : test edi, 2 jz skip2 stosw sub ecx, 2 skip2 : mov ebx, ecx shr ecx, 2 rep stosd test bl, 2 jz skip3 stosw skip3 : test bl, 1 jz endit stosb endit : } } static __inline void copybuf(const void *s, void *d, int32_t c) { _asm { mov esi, s mov edi, d mov ecx, c rep movsd } } static __inline void copybufbyte(const void *s, void *d, int32_t c) { _asm { mov esi, s mov edi, d mov ecx, c cmp ecx, 4 jae longcopy test cl, 1 jz preskip movsb preskip : shr ecx, 1 rep movsw jmp endit longcopy : test edi, 1 jz skip1 movsb dec ecx skip1 : test edi, 2 jz skip2 movsw sub ecx, 2 skip2 : mov ebx, ecx shr ecx, 2 rep movsd test bl, 2 jz skip3 movsw skip3 : test bl, 1 jz endit movsb endit : } } static __inline void copybufreverse(const void *s, void *d, int32_t c) { _asm { mov esi, s mov edi, d mov ecx, c shr ecx, 1 jnc skipit1 mov al, byte ptr[esi] dec esi mov byte ptr[edi], al inc edi skipit1 : shr ecx, 1 jnc skipit2 mov ax, word ptr[esi-1] sub esi, 2 ror ax, 8 mov word ptr[edi], ax add edi, 2 skipit2: test ecx, ecx jz endloop begloop : mov eax, dword ptr[esi-3] sub esi, 4 bswap eax mov dword ptr[edi], eax add edi, 4 dec ecx jnz begloop endloop : } } static __inline void qinterpolatedown16(int32_t a, int32_t c, int32_t d, int32_t s) { _asm { mov eax, a mov ecx, c mov edx, d mov esi, s mov ebx, ecx shr ecx, 1 jz skipbegcalc begqcalc : lea edi, [edx+esi] sar edx, 16 mov dword ptr[eax], edx lea edx, [edi+esi] sar edi, 16 mov dword ptr[eax+4], edi add eax, 8 dec ecx jnz begqcalc test ebx, 1 jz skipbegqcalc2 skipbegcalc : sar edx, 16 mov dword ptr[eax], edx skipbegqcalc2 : } } static __inline void qinterpolatedown16short(int32_t a, int32_t c, int32_t d, int32_t s) { _asm { mov eax, a mov ecx, c mov edx, d mov esi, s test ecx, ecx jz endit test al, 2 jz skipalignit mov ebx, edx sar ebx, 16 mov word ptr[eax], bx add edx, esi add eax, 2 dec ecx jz endit skipalignit : sub ecx, 2 jc finishit begqcalc : mov ebx, edx add edx, esi sar ebx, 16 mov edi, edx and edi, 0ffff0000h add edx, esi add ebx, edi mov dword ptr[eax], ebx add eax, 4 sub ecx, 2 jnc begqcalc test cl, 1 jz endit finishit : mov ebx, edx sar ebx, 16 mov word ptr[eax], bx endit : } } static __inline int32_t klabs(int32_t a) { _asm { mov eax, a test eax, eax jns skipnegate neg eax skipnegate : } } static __inline int32_t ksgn(int32_t b) { _asm { mov ebx, b add ebx, ebx sbb eax, eax cmp eax, ebx adc al, 0 } } static __inline void swapchar(void *a, void *b) { _asm { mov eax, a mov ebx, b mov cl, [eax] mov ch, [ebx] mov[ebx], cl mov[eax], ch } } static __inline void swapshort(void *a, void *b) { _asm { mov eax, a mov ebx, b mov cx, [eax] mov dx, [ebx] mov[ebx], cx mov[eax], dx } } static __inline void swaplong(void *a, void *b) { _asm { mov eax, a mov ebx, b mov ecx, [eax] mov edx, [ebx] mov[ebx], ecx mov[eax], edx } } #define swapfloat swaplong static __inline void swapbuf4(void *a, void *b, int32_t c) { _asm { mov eax, a mov ebx, b mov ecx, c begswap : mov esi, [eax] mov edi, [ebx] mov[ebx], esi mov[eax], edi add eax, 4 add ebx, 4 dec ecx jnz short begswap } } static __inline void swap64bit(void *a, void *b) { _asm { mov eax, a mov ebx, b mov ecx, [eax] mov edx, [ebx] mov[ebx], ecx mov ecx, [eax+4] mov[eax], edx mov edx, [ebx+4] mov[ebx+4], ecx mov[eax+4], edx } } //swapchar2(ptr1,ptr2,xsiz); is the same as: //swapchar(ptr1,ptr2); swapchar(ptr1+1,ptr2+xsiz); static __inline void swapchar2(void *a, void *b, int32_t s) { _asm { mov eax, a mov ebx, b mov esi, s add esi, ebx mov cx, [eax] mov dl, [ebx] mov[ebx], cl mov dh, [esi] mov[esi], ch mov[eax], dx } } //0x007ff000 is (11<<13), 0x3f800000 is (127<<23) static inline int32_t krecipasm(int32_t a) { _asm { push ebx mov eax, a mov fpuasm, eax fild dword ptr fpuasm add eax, eax fstp dword ptr fpuasm sbb ebx, ebx mov eax, fpuasm mov ecx, eax and eax, 0x007ff000 shr eax, 10 sub ecx, 0x3f800000 shr ecx, 23 mov eax, dword ptr reciptable[eax] sar eax, cl xor eax, ebx pop ebx } } //}}} #endif // __pragmas_x86_h__ #endif // __pragmas_h__