= MIN_LEN)
- {
- register unsigned long int delta;
- /* Align destinition to MMREG_SIZE -boundary */
- delta = ((unsigned long int)dest)&(SSE_MMREG_SIZE-1);
- if (delta)
- {
- delta=SSE_MMREG_SIZE-delta;
- n -= delta;
- small_memcpy(dest, src, delta);
- }
- i = n >> 6; /* n/64 */
- n&=63;
- if (((unsigned long)src) & 15)
- /* if SRC is misaligned */
- for (; i>0; i--)
- {
- __asm__ __volatile__ (
- "prefetchnta 320(%0);"
- "prefetchnta 352(%0);"
- "movups (%0), %%xmm0;"
- "movups 16(%0), %%xmm1;"
- "movups 32(%0), %%xmm2;"
- "movups 48(%0), %%xmm3;"
- "movntps %%xmm0, (%1);"
- "movntps %%xmm1, 16(%1);"
- "movntps %%xmm2, 32(%1);"
- "movntps %%xmm3, 48(%1);"
- :: "r" (src), "r" (dest) : "memory");
- src = (const unsigned char *)src + 64;
- dest = (unsigned char *)dest + 64;
- }
- else
- /*
- Only if SRC is aligned on 16-byte boundary.
- It allows to use movaps instead of movups, which required data
- to be aligned or a general-protection exception (#GP) is generated.
- */
- for (; i>0; i--)
- {
- __asm__ __volatile__ (
- "prefetchnta 320(%0);"
- "prefetchnta 352(%0);"
- "movaps (%0), %%xmm0;"
- "movaps 16(%0), %%xmm1;"
- "movaps 32(%0), %%xmm2;"
- "movaps 48(%0), %%xmm3;"
- "movntps %%xmm0, (%1);"
- "movntps %%xmm1, 16(%1);"
- "movntps %%xmm2, 32(%1);"
- "movntps %%xmm3, 48(%1);"
- :: "r" (src), "r" (dest) : "memory");
- src = ((const unsigned char *)src) + 64;
- dest = ((unsigned char *)dest) + 64;
- }
- /* since movntq is weakly-ordered, a "sfence"
- * is needed to become ordered again. */
- __asm__ __volatile__ ("sfence":::"memory");
- /* enables to use FPU */
- __asm__ __volatile__ ("emms":::"memory");
- }
- /*
- * Now do the tail of the block
- */
- if (n) __memcpy(dest, src, n);
- return retval;
-}
-
-static FUNCTARGET("mmx") void *mmx2_cpy(void *dest, const void *src, size_t n)
-{
- void *retval = dest;
- size_t i;
-
- /* PREFETCH has effect even for MOVSB instruction ;) */
- __asm__ __volatile__ (
- "prefetchnta (%0);"
- "prefetchnta 32(%0);"
- "prefetchnta 64(%0);"
- "prefetchnta 96(%0);"
- "prefetchnta 128(%0);"
- "prefetchnta 160(%0);"
- "prefetchnta 192(%0);"
- "prefetchnta 224(%0);"
- "prefetchnta 256(%0);"
- "prefetchnta 288(%0);"
- : : "r" (src));
-
- if (n >= MIN_LEN)
- {
- register unsigned long int delta;
- /* Align destinition to MMREG_SIZE -boundary */
- delta = ((unsigned long int)dest)&(MMX_MMREG_SIZE-1);
- if (delta)
- {
- delta=MMX_MMREG_SIZE-delta;
- n -= delta;
- small_memcpy(dest, src, delta);
- }
- i = n >> 6; /* n/64 */
- n&=63;
- for (; i>0; i--)
- {
- __asm__ __volatile__ (
- "prefetchnta 320(%0);"
- "prefetchnta 352(%0);"
- "movq (%0), %%mm0;"
- "movq 8(%0), %%mm1;"
- "movq 16(%0), %%mm2;"
- "movq 24(%0), %%mm3;"
- "movq 32(%0), %%mm4;"
- "movq 40(%0), %%mm5;"
- "movq 48(%0), %%mm6;"
- "movq 56(%0), %%mm7;"
- "movntq %%mm0, (%1);"
- "movntq %%mm1, 8(%1);"
- "movntq %%mm2, 16(%1);"
- "movntq %%mm3, 24(%1);"
- "movntq %%mm4, 32(%1);"
- "movntq %%mm5, 40(%1);"
- "movntq %%mm6, 48(%1);"
- "movntq %%mm7, 56(%1);"
- :: "r" (src), "r" (dest) : "memory");
- src = ((const unsigned char *)src) + 64;
- dest = ((unsigned char *)dest) + 64;
- }
- /* since movntq is weakly-ordered, a "sfence"
- * is needed to become ordered again. */
- __asm__ __volatile__ ("sfence":::"memory");
- __asm__ __volatile__ ("emms":::"memory");
- }
- /*
- * Now do the tail of the block
- */
- if (n) __memcpy(dest, src, n);
- return retval;
-}
-
-static FUNCTARGET("mmx") void *mmx1_cpy(void *dest, const void *src, size_t n) //3DNOW
-{
- void *retval = dest;
- size_t i;
-
- /* PREFETCH has effect even for MOVSB instruction ;) */
- __asm__ __volatile__ (
- "prefetch (%0);"
- "prefetch 32(%0);"
- "prefetch 64(%0);"
- "prefetch 96(%0);"
- "prefetch 128(%0);"
- "prefetch 160(%0);"
- "prefetch 192(%0);"
- "prefetch 224(%0);"
- "prefetch 256(%0);"
- "prefetch 288(%0);"
- : : "r" (src));
-
- if (n >= MMX1_MIN_LEN)
- {
- register unsigned long int delta;
- /* Align destinition to MMREG_SIZE -boundary */
- delta = ((unsigned long int)dest)&(MMX_MMREG_SIZE-1);
- if (delta)
- {
- delta=MMX_MMREG_SIZE-delta;
- n -= delta;
- small_memcpy(dest, src, delta);
- }
- i = n >> 6; /* n/64 */
- n&=63;
- for (; i>0; i--)
- {
- __asm__ __volatile__ (
- "prefetch 320(%0);"
- "prefetch 352(%0);"
- "movq (%0), %%mm0;"
- "movq 8(%0), %%mm1;"
- "movq 16(%0), %%mm2;"
- "movq 24(%0), %%mm3;"
- "movq 32(%0), %%mm4;"
- "movq 40(%0), %%mm5;"
- "movq 48(%0), %%mm6;"
- "movq 56(%0), %%mm7;"
- "movq %%mm0, (%1);"
- "movq %%mm1, 8(%1);"
- "movq %%mm2, 16(%1);"
- "movq %%mm3, 24(%1);"
- "movq %%mm4, 32(%1);"
- "movq %%mm5, 40(%1);"
- "movq %%mm6, 48(%1);"
- "movq %%mm7, 56(%1);"
- :: "r" (src), "r" (dest) : "memory");
- src = ((const unsigned char *)src) + 64;
- dest = ((unsigned char *)dest) + 64;
- }
- __asm__ __volatile__ ("femms":::"memory"); // same as mmx_cpy() but with a femms
- }
- /*
- * Now do the tail of the block
- */
- if (n) __memcpy(dest, src, n);
- return retval;
-}
-#endif
-
-// Alam: why? memcpy may be __cdecl/_System and our code may be not the same type
-static void *cpu_cpy(void *dest, const void *src, size_t n)
-{
- if (src == NULL)
- {
- CONS_Debug(DBG_MEMORY, "Memcpy from 0x0?!: %p %p %s\n", dest, src, sizeu1(n));
- return dest;
- }
-
- if(dest == NULL)
- {
- CONS_Debug(DBG_MEMORY, "Memcpy to 0x0?!: %p %p %s\n", dest, src, sizeu1(n));
- return dest;
- }
-
return memcpy(dest, src, n);
}
-static /*FUNCTARGET("mmx")*/ void *mmx_cpy(void *dest, const void *src, size_t n)
-{
-#if defined (_MSC_VER) && defined (_X86_)
- _asm
- {
- mov ecx, [n]
- mov esi, [src]
- mov edi, [dest]
- shr ecx, 6 // mit mmx: 64bytes per iteration
- jz lower_64 // if lower than 64 bytes
- loop_64: // MMX transfers multiples of 64bytes
- movq mm0, 0[ESI] // read sources
- movq mm1, 8[ESI]
- movq mm2, 16[ESI]
- movq mm3, 24[ESI]
- movq mm4, 32[ESI]
- movq mm5, 40[ESI]
- movq mm6, 48[ESI]
- movq mm7, 56[ESI]
-
- movq 0[EDI], mm0 // write destination
- movq 8[EDI], mm1
- movq 16[EDI], mm2
- movq 24[EDI], mm3
- movq 32[EDI], mm4
- movq 40[EDI], mm5
- movq 48[EDI], mm6
- movq 56[EDI], mm7
-
- add esi, 64
- add edi, 64
- dec ecx
- jnz loop_64
- emms // close mmx operation
- lower_64:// transfer rest of buffer
- mov ebx,esi
- sub ebx,src
- mov ecx,[n]
- sub ecx,ebx
- shr ecx, 3 // multiples of 8 bytes
- jz lower_8
- loop_8:
- movq mm0, [esi] // read source
- movq [edi], mm0 // write destination
- add esi, 8
- add edi, 8
- dec ecx
- jnz loop_8
- emms // close mmx operation
- lower_8:
- mov ebx,esi
- sub ebx,src
- mov ecx,[n]
- sub ecx,ebx
- rep movsb
- mov eax, [dest] // return dest
- }
-#elif defined (__GNUC__) && defined (__i386__)
- void *retval = dest;
- size_t i;
-
- if (n >= MMX1_MIN_LEN)
- {
- register unsigned long int delta;
- /* Align destinition to MMREG_SIZE -boundary */
- delta = ((unsigned long int)dest)&(MMX_MMREG_SIZE-1);
- if (delta)
- {
- delta=MMX_MMREG_SIZE-delta;
- n -= delta;
- small_memcpy(dest, src, delta);
- }
- i = n >> 6; /* n/64 */
- n&=63;
- for (; i>0; i--)
- {
- __asm__ __volatile__ (
- "movq (%0), %%mm0;"
- "movq 8(%0), %%mm1;"
- "movq 16(%0), %%mm2;"
- "movq 24(%0), %%mm3;"
- "movq 32(%0), %%mm4;"
- "movq 40(%0), %%mm5;"
- "movq 48(%0), %%mm6;"
- "movq 56(%0), %%mm7;"
- "movq %%mm0, (%1);"
- "movq %%mm1, 8(%1);"
- "movq %%mm2, 16(%1);"
- "movq %%mm3, 24(%1);"
- "movq %%mm4, 32(%1);"
- "movq %%mm5, 40(%1);"
- "movq %%mm6, 48(%1);"
- "movq %%mm7, 56(%1);"
- :: "r" (src), "r" (dest) : "memory");
- src = ((const unsigned char *)src) + 64;
- dest = ((unsigned char *)dest) + 64;
- }
- __asm__ __volatile__ ("emms":::"memory");
- }
- /*
- * Now do the tail of the block
- */
- if (n) __memcpy(dest, src, n);
- return retval;
-#else
- return cpu_cpy(dest, src, n);
-#endif
-}
-
-void *(*M_Memcpy)(void* dest, const void* src, size_t n) = cpu_cpy;
-
-/** Memcpy that uses MMX, 3DNow, MMXExt or even SSE
- * Do not use on overlapped memory, use memmove for that
- */
-void M_SetupMemcpy(void)
-{
-#if defined (__GNUC__) && defined (__i386__)
- if (R_SSE2)
- M_Memcpy = sse_cpy;
- else if (R_MMXExt)
- M_Memcpy = mmx2_cpy;
- else if (R_3DNow)
- M_Memcpy = mmx1_cpy;
- else
-#endif
- if (R_MMX)
- M_Memcpy = mmx_cpy;
-#if 0
- M_Memcpy = cpu_cpy;
-#endif
-}
-
/** Return the appropriate message for a file error or end of file.
*/
const char *M_FileError(FILE *fp)
diff --git a/src/m_misc.h b/src/m_misc.h
index 1e7befb1..0ac4c365 100644
--- a/src/m_misc.h
+++ b/src/m_misc.h
@@ -98,8 +98,6 @@ TMatrix *RotateZMatrix(angle_t rad);
// s1 = s2+s3+s1 (1024 lenghtmax)
void strcatbf(char *s1, const char *s2, const char *s3);
-void M_SetupMemcpy(void);
-
const char *M_FileError(FILE *handle);
// counting bits, for weapon ammo code, usually
diff --git a/src/p5prof.h b/src/p5prof.h
deleted file mode 100644
index a9ed3965..00000000
--- a/src/p5prof.h
+++ /dev/null
@@ -1,278 +0,0 @@
-/*********************************************************
- *
- * File: p5prof.h
- * By: Kevin Baca
- *
- * MODIFIED BY Fab SO THAT RDMSR(...) WRITES EDX : EAX TO A LONG LONG
- * (WHICH MEANS WRITE THE LOW DWORD FIRST)
- *
- * Now in yer code do:
- * INT64 count,total;
- *
- * ...
- * RDMSR(0x10,&count); //inner loop count
- * total += count;
- * ...
- *
- * printf("0x%x %x", (INT32)total, *((INT32 *)&total+1));
- * // HIGH LOW
- *
- *********************************************************/
-/**\file
- \brief This file provides macros to profile your code.
-
- Here's how they work...
-
- As you may or may not know, the Pentium class of
- processors provides extremely fine grained profiling
- capabilities through the use of what are called
- Machine Specific Registers (MSRs). These registers
- can provide information about almost any aspect of
- CPU performance down to a single cycle.
-
- The MSRs of interest for profiling are specified by
- indices 0x10, 0x11, 0x12, and 0x13. Here is a brief
- description of each of these registers:
-
- MSR 0x10
- This register is simple a cycle counter.
-
- MSR 0x11
- This register controls what type of profiling data
- will be gathered.
-
- MSRs 0x12 and 0x13
- These registers gather the profiling data specified in
- MSR 0x11.
-
- Each MSR is 64 bits wide. For the Pentium processor,
- only the lower 32 bits of MSR 0x11 are valid. Bits 0-15
- specify what data will be gathered in MSR 0x12. Bits 16-31
- specify what data will be gathered in MSR 0x13. Both sets
- of bits have the same format:
-
- Bits 0-5 specify which hardware event will be tracked.
- Bit 6, if set, indicates events will be tracked in
- rings 0-2.
- Bit 7, if set, indicates events will be tracked in
- ring 3.
- Bit 8, if set, indicates cycles should be counted for
- the specified event. If clear, it indicates the
- number of events should be counted.
-
- Two instructions are provided for manupulating the MSRs.
- RDMSR (Read Machine Specific Register) and WRMSR
- (Write Machine Specific Register). These opcodes were
- originally undocumented and therefore most assemblers don't
- recognize them. Their byte codes are provided in the
- macros below.
-
- RDMSR takes the MSR index in ecx and the profiling criteria
- in edx : eax.
-
- WRMSR takes the MSR index in ecx and returns the profile data
- in edx : eax.
-
- Two profiling registers limits profiling capability to
- gathering only two types of information. The register
- usage can, however, be combined in interesting ways.
- For example, you can set one register to gather the
- number of a specific type of event while the other gathers
- the number of cycles for the same event. Or you can
- gather the number of two separate events while using
- MSR 0x10 to gather the number of cycles.
-
- The enumerated list provides somewhat readable labels for
- the types of events that can be tracked.
-
- For more information, get ahold of appendix H from the
- Intel Pentium programmer's manual (I don't remember the
- order number) or go to
- http://green.kaist.ac.kr/jwhahn/art3.htm.
- That's an article by Terje Mathisen where I got most of
- my information.
-
- You may use this code however you wish. I hope it's
- useful and I hope I got everything right.
-
- -Kevin
-
- kbaca@skygames.com
-
-*/
-
-#ifdef __GNUC__
-
-#define RDTSC(_dst) \
-__asm__("
- .byte 0x0F,0x31
- movl %%edx,(%%edi)
- movl %%eax,4(%%edi)"\
-: : "D" (_dst) : "eax", "edx", "edi")
-
-// the old code... swapped it
-// movl %%edx,(%%edi)
-// movl %%eax,4(%%edi)"
-#define RDMSR(_msri, _msrd) \
-__asm__("
- .byte 0x0F,0x32
- movl %%eax,(%%edi)
- movl %%edx,4(%%edi)"\
-: : "c" (_msri), "D" (_msrd) : "eax", "ecx", "edx", "edi")
-
-#define WRMSR(_msri, _msrd) \
-__asm__("
- xorl %%edx,%%edx
- .byte 0x0F,0x30"\
-: : "c" (_msri), "a" (_msrd) : "eax", "ecx", "edx")
-
-#define RDMSR_0x12_0x13(_msr12, _msr13) \
-__asm__("
- movl $0x12,%%ecx
- .byte 0x0F,0x32
- movl %%edx,(%%edi)
- movl %%eax,4(%%edi)
- movl $0x13,%%ecx
- .byte 0x0F,0x32
- movl %%edx,(%%esi)
- movl %%eax,4(%%esi)"\
-: : "D" (_msr12), "S" (_msr13) : "eax", "ecx", "edx", "edi")
-
-#define ZERO_MSR_0x12_0x13() \
-__asm__("
- xorl %%edx,%%edx
- xorl %%eax,%%eax
- movl $0x12,%%ecx
- .byte 0x0F,0x30
- movl $0x13,%%ecx
- .byte 0x0F,0x30"\
-: : : "eax", "ecx", "edx")
-
-#elif defined (__WATCOMC__)
-
-extern void RDTSC(UINT32 *dst);
-#pragma aux RDTSC =\
- "db 0x0F,0x31"\
- "mov [edi],edx"\
- "mov [4+edi],eax"\
- parm [edi]\
- modify [eax edx edi];
-
-extern void RDMSR(UINT32 msri, UINT32 *msrd);
-#pragma aux RDMSR =\
- "db 0x0F,0x32"\
- "mov [edi],edx"\
- "mov [4+edi],eax"\
- parm [ecx] [edi]\
- modify [eax ecx edx edi];
-
-extern void WRMSR(UINT32 msri, UINT32 msrd);
-#pragma aux WRMSR =\
- "xor edx,edx"\
- "db 0x0F,0x30"\
- parm [ecx] [eax]\
- modify [eax ecx edx];
-
-extern void RDMSR_0x12_0x13(UINT32 *msr12, UINT32 *msr13);
-#pragma aux RDMSR_0x12_0x13 =\
- "mov ecx,0x12"\
- "db 0x0F,0x32"\
- "mov [edi],edx"\
- "mov [4+edi],eax"\
- "mov ecx,0x13"\
- "db 0x0F,0x32"\
- "mov [esi],edx"\
- "mov [4+esi],eax"\
- parm [edi] [esi]\
- modify [eax ecx edx edi esi];
-
-extern void ZERO_MSR_0x12_0x13(void);
-#pragma aux ZERO_MSR_0x12_0x13 =\
- "xor edx,edx"\
- "xor eax,eax"\
- "mov ecx,0x12"\
- "db 0x0F,0x30"\
- "mov ecx,0x13"\
- "db 0x0F,0x30"\
- modify [eax ecx edx];
-
-#endif
-
-typedef enum
-{
- DataRead,
- DataWrite,
- DataTLBMiss,
- DataReadMiss,
- DataWriteMiss,
- WriteHitEM,
- DataCacheLinesWritten,
- DataCacheSnoops,
- DataCacheSnoopHit,
- MemAccessBothPipes,
- BankConflict,
- MisalignedDataRef,
- CodeRead,
- CodeTLBMiss,
- CodeCacheMiss,
- SegRegLoad,
- RESERVED0,
- RESERVED1,
- Branch,
- BTBHit,
- TakenBranchOrBTBHit,
- PipelineFlush,
- InstructionsExeced,
- InstructionsExecedVPipe,
- BusUtilizationClocks,
- PipelineStalledWriteBackup,
- PipelineStalledDateMemRead,
- PipeLineStalledWriteEM,
- LockedBusCycle,
- IOReadOrWriteCycle,
- NonCacheableMemRef,
- AGI,
- RESERVED2,
- RESERVED3,
- FPOperation,
- Breakpoint0Match,
- Breakpoint1Match,
- Breakpoint2Match,
- Breakpoint3Match,
- HWInterrupt,
- DataReadOrWrite,
- DataReadOrWriteMiss
-};
-
-#define PROF_CYCLES (0x100)
-#define PROF_EVENTS (0x000)
-#define RING_012 (0x40)
-#define RING_3 (0x80)
-#define RING_0123 (RING_012 | RING_3)
-
-/*void ProfSetProfiles(UINT32 msr12, UINT32 msr13);*/
-#define ProfSetProfiles(_msr12, _msr13)\
-{\
- UINT32 prof;\
-\
- prof = (_msr12) | ((_msr13) << 16);\
- WRMSR(0x11, prof);\
-}
-
-/*void ProfBeginProfiles(void);*/
-#define ProfBeginProfiles()\
- ZERO_MSR_0x12_0x13();
-
-/*void ProfGetProfiles(UINT32 msr12[2], UINT32 msr13[2]);*/
-#define ProfGetProfiles(_msr12, _msr13)\
- RDMSR_0x12_0x13(_msr12, _msr13);
-
-/*void ProfZeroTimer(void);*/
-#define ProfZeroTimer()\
- WRMSR(0x10, 0);
-
-/*void ProfReadTimer(UINT32 timer[2]);*/
-#define ProfReadTimer(timer)\
- RDMSR(0x10, timer);
-
-/*EOF*/
diff --git a/src/r_draw.h b/src/r_draw.h
index a9921028..60d32a94 100644
--- a/src/r_draw.h
+++ b/src/r_draw.h
@@ -138,20 +138,6 @@ void R_DrawColumn_8(void);
void R_DrawShadeColumn_8(void);
void R_DrawTranslucentColumn_8(void);
-#ifdef USEASM
-void ASMCALL R_DrawColumn_8_ASM(void);
-#define R_DrawWallColumn_8_ASM R_DrawColumn_8_ASM
-void ASMCALL R_DrawShadeColumn_8_ASM(void);
-void ASMCALL R_DrawTranslucentColumn_8_ASM(void);
-void ASMCALL R_Draw2sMultiPatchColumn_8_ASM(void);
-
-void ASMCALL R_DrawColumn_8_MMX(void);
-#define R_DrawWallColumn_8_MMX R_DrawColumn_8_MMX
-
-void ASMCALL R_Draw2sMultiPatchColumn_8_MMX(void);
-void ASMCALL R_DrawSpan_8_MMX(void);
-#endif
-
void R_DrawTranslatedColumn_8(void);
void R_DrawTranslatedTranslucentColumn_8(void);
void R_DrawSpan_8(void);
diff --git a/src/r_splats.c b/src/r_splats.c
index 9ab67127..f86f1686 100644
--- a/src/r_splats.c
+++ b/src/r_splats.c
@@ -23,11 +23,6 @@ static wallsplat_t wallsplats[MAXLEVELSPLATS]; // WALL splats
static INT32 freewallsplat;
#endif
-#ifdef USEASM
-/// \brief for floorsplats \note accessed by asm code
-struct rastery_s *prastertab;
-#endif
-
#ifdef FLOORSPLATS
static floorsplat_t floorsplats[1]; // FLOOR splats
static INT32 freefloorsplat;
@@ -339,12 +334,6 @@ void R_AddVisibleFloorSplats(subsector_t *subsec)
}
}
-#ifdef USEASM
-// tv1, tv2 = x/y qui varie dans la texture, tc = x/y qui est constant.
-void ASMCALL rasterize_segment_tex(INT32 x1, INT32 y1, INT32 x2, INT32 y2, INT32 tv1, INT32 tv2,
- INT32 tc, INT32 dir);
-#endif
-
// current test with floor tile
//#define FLOORSPLATSOLIDCOLOR
diff --git a/src/screen.c b/src/screen.c
index fd97e2ca..2d5f9160 100644
--- a/src/screen.c
+++ b/src/screen.c
@@ -33,10 +33,6 @@
// SRB2Kart
#include "r_fps.h" // R_GetFramerateCap
-#if defined (USEASM) && !defined (NORUSEASM)//&& (!defined (_MSC_VER) || (_MSC_VER <= 1200))
-#define RUSEASM //MSC.NET can't patch itself
-#endif
-
// --------------------------------------------
// assembly or c drawer routines for 8bpp/16bpp
// --------------------------------------------
@@ -94,16 +90,6 @@ UINT8 *scr_borderpatch; // flat used to fill the reduced view borders set at ST_
// Short and Tall sky drawer, for the current color mode
void (*walldrawerfunc)(void);
-boolean R_ASM = true;
-boolean R_486 = false;
-boolean R_586 = false;
-boolean R_MMX = false;
-boolean R_SSE = false;
-boolean R_3DNow = false;
-boolean R_MMXExt = false;
-boolean R_SSE2 = false;
-
-
void SCR_SetMode(void)
{
if (dedicated)
@@ -132,28 +118,6 @@ void SCR_SetMode(void)
walldrawerfunc = R_DrawWallColumn_8;
twosmultipatchfunc = R_Draw2sMultiPatchColumn_8;
twosmultipatchtransfunc = R_Draw2sMultiPatchTranslucentColumn_8;
-#ifdef RUSEASM
- if (R_ASM)
- {
- if (R_MMX)
- {
- colfunc = basecolfunc = R_DrawColumn_8_MMX;
- //shadecolfunc = R_DrawShadeColumn_8_ASM;
- //fuzzcolfunc = R_DrawTranslucentColumn_8_ASM;
- walldrawerfunc = R_DrawWallColumn_8_MMX;
- twosmultipatchfunc = R_Draw2sMultiPatchColumn_8_MMX;
- spanfunc = basespanfunc = R_DrawSpan_8_MMX;
- }
- else
- {
- colfunc = basecolfunc = R_DrawColumn_8_ASM;
- //shadecolfunc = R_DrawShadeColumn_8_ASM;
- //fuzzcolfunc = R_DrawTranslucentColumn_8_ASM;
- walldrawerfunc = R_DrawWallColumn_8_ASM;
- twosmultipatchfunc = R_Draw2sMultiPatchColumn_8_ASM;
- }
- }
-#endif
}
/* else if (vid.bpp > 1)
{
@@ -181,50 +145,6 @@ void SCR_SetMode(void)
//
void SCR_Startup(void)
{
- const CPUInfoFlags *RCpuInfo = I_CPUInfo();
- if (!M_CheckParm("-NOCPUID") && RCpuInfo)
- {
-#if defined (__i386__) || defined (_M_IX86) || defined (__WATCOMC__)
- R_486 = true;
-#endif
- if (RCpuInfo->RDTSC)
- R_586 = true;
- if (RCpuInfo->MMX)
- R_MMX = true;
- if (RCpuInfo->AMD3DNow)
- R_3DNow = true;
- if (RCpuInfo->MMXExt)
- R_MMXExt = true;
- if (RCpuInfo->SSE)
- R_SSE = true;
- if (RCpuInfo->SSE2)
- R_SSE2 = true;
- CONS_Printf("CPU Info: 486: %i, 586: %i, MMX: %i, 3DNow: %i, MMXExt: %i, SSE2: %i\n", R_486, R_586, R_MMX, R_3DNow, R_MMXExt, R_SSE2);
- }
-
- if (M_CheckParm("-noASM"))
- R_ASM = false;
- if (M_CheckParm("-486"))
- R_486 = true;
- if (M_CheckParm("-586"))
- R_586 = true;
- if (M_CheckParm("-MMX"))
- R_MMX = true;
- if (M_CheckParm("-3DNow"))
- R_3DNow = true;
- if (M_CheckParm("-MMXExt"))
- R_MMXExt = true;
-
- if (M_CheckParm("-SSE"))
- R_SSE = true;
- if (M_CheckParm("-noSSE"))
- R_SSE = false;
-
- if (M_CheckParm("-SSE2"))
- R_SSE2 = true;
-
- M_SetupMemcpy();
-
if (dedicated)
{
V_Init();
diff --git a/src/screen.h b/src/screen.h
index 1403524c..72ebffcf 100644
--- a/src/screen.h
+++ b/src/screen.h
@@ -138,17 +138,6 @@ extern void (*transtransfunc)(void);
extern void (*twosmultipatchfunc)(void);
extern void (*twosmultipatchtransfunc)(void);
-// -----
-// CPUID
-// -----
-extern boolean R_ASM;
-extern boolean R_486;
-extern boolean R_586;
-extern boolean R_MMX;
-extern boolean R_3DNow;
-extern boolean R_MMXExt;
-extern boolean R_SSE2;
-
// ----------------
// screen variables
// ----------------
diff --git a/src/sdl/MakeCYG.cfg b/src/sdl/MakeCYG.cfg
index 5907579c..b78316b0 100644
--- a/src/sdl/MakeCYG.cfg
+++ b/src/sdl/MakeCYG.cfg
@@ -7,7 +7,6 @@
NOHW=1
NOHS=1
- NOASM=1
OPTS+=-DLINUX
diff --git a/src/sdl/MakeNIX.cfg b/src/sdl/MakeNIX.cfg
index 86c8521c..d95c6660 100644
--- a/src/sdl/MakeNIX.cfg
+++ b/src/sdl/MakeNIX.cfg
@@ -39,7 +39,6 @@ endif
#
ifdef SOLARIS
NOIPX=1
- NOASM=1
OPTS+=-DSOLARIS -DINADDR_NONE=INADDR_ANY -DBSD_COMP
OPTS+=-I/usr/local/include -I/opt/sfw/include
LDFLAGS+=-L/opt/sfw/lib
diff --git a/src/sdl/Makefile.cfg b/src/sdl/Makefile.cfg
index 1744d691..73f8710b 100644
--- a/src/sdl/Makefile.cfg
+++ b/src/sdl/Makefile.cfg
@@ -37,14 +37,6 @@ else
endif
endif
-
- #use the x86 asm code
-ifndef CYGWIN32
-ifndef NOASM
- USEASM=1
-endif
-endif
-
OBJS+=$(OBJDIR)/i_video.o $(OBJDIR)/dosstr.o $(OBJDIR)/endtxt.o $(OBJDIR)/hwsym_sdl.o
OPTS+=-DDIRECTFULLSCREEN -DHAVE_SDL
diff --git a/src/sdl/i_main.c b/src/sdl/i_main.c
index 4ac45aa1..a7891003 100644
--- a/src/sdl/i_main.c
+++ b/src/sdl/i_main.c
@@ -70,40 +70,6 @@ char logfilename[1024];
typedef BOOL (WINAPI *p_IsDebuggerPresent)(VOID);
#endif
-#if defined (_WIN32)
-static inline VOID MakeCodeWritable(VOID)
-{
-#ifdef USEASM // Disable write-protection of code segment
- DWORD OldRights;
- const DWORD NewRights = PAGE_EXECUTE_READWRITE;
- PBYTE pBaseOfImage = (PBYTE)GetModuleHandle(NULL);
- PIMAGE_DOS_HEADER dosH =(PIMAGE_DOS_HEADER)pBaseOfImage;
- PIMAGE_NT_HEADERS ntH = (PIMAGE_NT_HEADERS)(pBaseOfImage + dosH->e_lfanew);
- PIMAGE_OPTIONAL_HEADER oH = (PIMAGE_OPTIONAL_HEADER)
- ((PBYTE)ntH + sizeof (IMAGE_NT_SIGNATURE) + sizeof (IMAGE_FILE_HEADER));
- LPVOID pA = pBaseOfImage+oH->BaseOfCode;
- SIZE_T pS = oH->SizeOfCode;
-#if 1 // try to find the text section
- PIMAGE_SECTION_HEADER ntS = IMAGE_FIRST_SECTION (ntH);
- WORD s;
- for (s = 0; s < ntH->FileHeader.NumberOfSections; s++)
- {
- if (memcmp (ntS[s].Name, ".text\0\0", 8) == 0)
- {
- pA = pBaseOfImage+ntS[s].VirtualAddress;
- pS = ntS[s].Misc.VirtualSize;
- break;
- }
- }
-#endif
-
- if (!VirtualProtect(pA,pS,NewRights,&OldRights))
- I_Error("Could not make code writable\n");
-#endif
-}
-#endif
-
-
#ifdef _WIN32
static void
ChDirToExe (void)
@@ -185,7 +151,6 @@ int main(int argc, char **argv)
#ifndef __MINGW32__
prevExceptionFilter = SetUnhandledExceptionFilter(RecordExceptionInfo);
#endif
- MakeCodeWritable();
#endif
// startup SRB2
diff --git a/src/sdl/i_system.c b/src/sdl/i_system.c
index 9349b91c..cf0f28d2 100644
--- a/src/sdl/i_system.c
+++ b/src/sdl/i_system.c
@@ -3910,69 +3910,6 @@ UINT32 I_GetFreeMem(UINT32 *total)
#endif
}
-const CPUInfoFlags *I_CPUInfo(void)
-{
-#if defined (_WIN32)
- static CPUInfoFlags WIN_CPUInfo;
- SYSTEM_INFO SI;
- p_IsProcessorFeaturePresent pfnCPUID = (p_IsProcessorFeaturePresent)(LPVOID)GetProcAddress(GetModuleHandleA("kernel32.dll"), "IsProcessorFeaturePresent");
-
- ZeroMemory(&WIN_CPUInfo,sizeof (WIN_CPUInfo));
- if (pfnCPUID)
- {
- WIN_CPUInfo.FPPE = pfnCPUID( 0); //PF_FLOATING_POINT_PRECISION_ERRATA
- WIN_CPUInfo.FPE = pfnCPUID( 1); //PF_FLOATING_POINT_EMULATED
- WIN_CPUInfo.cmpxchg = pfnCPUID( 2); //PF_COMPARE_EXCHANGE_DOUBLE
- WIN_CPUInfo.MMX = pfnCPUID( 3); //PF_MMX_INSTRUCTIONS_AVAILABLE
- WIN_CPUInfo.PPCMM64 = pfnCPUID( 4); //PF_PPC_MOVEMEM_64BIT_OK
- WIN_CPUInfo.ALPHAbyte = pfnCPUID( 5); //PF_ALPHA_BYTE_INSTRUCTIONS
- WIN_CPUInfo.SSE = pfnCPUID( 6); //PF_XMMI_INSTRUCTIONS_AVAILABLE
- WIN_CPUInfo.AMD3DNow = pfnCPUID( 7); //PF_3DNOW_INSTRUCTIONS_AVAILABLE
- WIN_CPUInfo.RDTSC = pfnCPUID( 8); //PF_RDTSC_INSTRUCTION_AVAILABLE
- WIN_CPUInfo.PAE = pfnCPUID( 9); //PF_PAE_ENABLED
- WIN_CPUInfo.SSE2 = pfnCPUID(10); //PF_XMMI64_INSTRUCTIONS_AVAILABLE
- //WIN_CPUInfo.blank = pfnCPUID(11); //PF_SSE_DAZ_MODE_AVAILABLE
- WIN_CPUInfo.DEP = pfnCPUID(12); //PF_NX_ENABLED
- WIN_CPUInfo.SSE3 = pfnCPUID(13); //PF_SSE3_INSTRUCTIONS_AVAILABLE
- WIN_CPUInfo.cmpxchg16b = pfnCPUID(14); //PF_COMPARE_EXCHANGE128
- WIN_CPUInfo.cmp8xchg16 = pfnCPUID(15); //PF_COMPARE64_EXCHANGE128
- WIN_CPUInfo.PFC = pfnCPUID(16); //PF_CHANNELS_ENABLED
- }
-#ifdef HAVE_SDLCPUINFO
- else
- {
- WIN_CPUInfo.RDTSC = SDL_HasRDTSC();
- WIN_CPUInfo.MMX = SDL_HasMMX();
- WIN_CPUInfo.AMD3DNow = SDL_Has3DNow();
- WIN_CPUInfo.SSE = SDL_HasSSE();
- WIN_CPUInfo.SSE2 = SDL_HasSSE2();
- WIN_CPUInfo.AltiVec = SDL_HasAltiVec();
- }
- WIN_CPUInfo.MMXExt = SDL_FALSE; //SDL_HasMMXExt(); No longer in SDL2
- WIN_CPUInfo.AMD3DNowExt = SDL_FALSE; //SDL_Has3DNowExt(); No longer in SDL2
-#endif
- GetSystemInfo(&SI);
- WIN_CPUInfo.CPUs = SI.dwNumberOfProcessors;
- WIN_CPUInfo.IA64 = (SI.dwProcessorType == 2200); // PROCESSOR_INTEL_IA64
- WIN_CPUInfo.AMD64 = (SI.dwProcessorType == 8664); // PROCESSOR_AMD_X8664
- return &WIN_CPUInfo;
-#elif defined (HAVE_SDLCPUINFO)
- static CPUInfoFlags SDL_CPUInfo;
- memset(&SDL_CPUInfo,0,sizeof (CPUInfoFlags));
- SDL_CPUInfo.RDTSC = SDL_HasRDTSC();
- SDL_CPUInfo.MMX = SDL_HasMMX();
- SDL_CPUInfo.MMXExt = SDL_FALSE; //SDL_HasMMXExt(); No longer in SDL2
- SDL_CPUInfo.AMD3DNow = SDL_Has3DNow();
- SDL_CPUInfo.AMD3DNowExt = SDL_FALSE; //SDL_Has3DNowExt(); No longer in SDL2
- SDL_CPUInfo.SSE = SDL_HasSSE();
- SDL_CPUInfo.SSE2 = SDL_HasSSE2();
- SDL_CPUInfo.AltiVec = SDL_HasAltiVec();
- return &SDL_CPUInfo;
-#else
- return NULL; /// \todo CPUID asm
-#endif
-}
-
// note CPUAFFINITY code used to reside here
void I_RegisterSysCommands(void) {}
#endif
diff --git a/src/tmap.nas b/src/tmap.nas
deleted file mode 100644
index 78840106..00000000
--- a/src/tmap.nas
+++ /dev/null
@@ -1,957 +0,0 @@
-;; SONIC ROBO BLAST 2
-;;-----------------------------------------------------------------------------
-;; Copyright (C) 1998-2000 by DooM Legacy Team.
-;; Copyright (C) 1999-2018 by Sonic Team Junior.
-;;
-;; This program is free software distributed under the
-;; terms of the GNU General Public License, version 2.
-;; See the 'LICENSE' file for more details.
-;;-----------------------------------------------------------------------------
-;; FILE:
-;; tmap.nas
-;; DESCRIPTION:
-;; Assembler optimised rendering code for software mode.
-;; Draw wall columns.
-
-
-[BITS 32]
-
-%define FRACBITS 16
-%define TRANSPARENTPIXEL 247
-
-%ifdef LINUX
-%macro cextern 1
-[extern %1]
-%endmacro
-
-%macro cglobal 1
-[global %1]
-%endmacro
-
-%else
-%macro cextern 1
-%define %1 _%1
-[extern %1]
-%endmacro
-
-%macro cglobal 1
-%define %1 _%1
-[global %1]
-%endmacro
-
-%endif
-
-
-; The viddef_s structure. We only need the width field.
-struc viddef_s
- resb 12
-.width: resb 4
- resb 44
-endstruc
-
-;; externs
-;; columns
-cextern dc_x
-cextern dc_yl
-cextern dc_yh
-cextern ylookup
-cextern columnofs
-cextern dc_source
-cextern dc_texturemid
-cextern dc_texheight
-cextern dc_iscale
-cextern dc_hires
-cextern centery
-cextern centeryfrac
-cextern dc_colormap
-cextern dc_transmap
-cextern colormaps
-cextern vid
-cextern topleft
-
-; DELME
-cextern R_DrawColumn_8
-
-; polygon edge rasterizer
-cextern prastertab
-
-[SECTION .data]
-
-;;.align 4
-loopcount dd 0
-pixelcount dd 0
-tystep dd 0
-
-[SECTION .text]
-
-;;----------------------------------------------------------------------
-;;
-;; R_DrawColumn : 8bpp column drawer
-;;
-;; New optimised version 10-01-1998 by D.Fabrice and P.Boris
-;; Revised by G. Dick July 2010 to support the intervening twelve years'
-;; worth of changes to the renderer. Since I only vaguely know what I'm
-;; doing, this is probably rather suboptimal. Help appreciated!
-;;
-;;----------------------------------------------------------------------
-;; fracstep, vid.width in memory
-;; eax = accumulator
-;; ebx = colormap
-;; ecx = count
-;; edx = heightmask
-;; esi = source
-;; edi = dest
-;; ebp = frac
-;;----------------------------------------------------------------------
-
-cglobal R_DrawColumn_8_ASM
-; align 16
-R_DrawColumn_8_ASM:
- push ebp ;; preserve caller's stack frame pointer
- push esi ;; preserve register variables
- push edi
- push ebx
-;;
-;; dest = ylookup[dc_yl] + columnofs[dc_x];
-;;
- mov ebp,[dc_yl]
- mov edi,[ylookup+ebp*4]
- mov ebx,[dc_x]
- add edi,[columnofs+ebx*4] ;; edi = dest
-;;
-;; pixelcount = yh - yl + 1
-;;
- mov ecx,[dc_yh]
- add ecx,1
- sub ecx,ebp ;; pixel count
- jle near .done ;; nothing to scale
-;;
-;; fracstep = dc_iscale; // But we just use [dc_iscale]
-;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
-;;
- mov eax,ebp ;; dc_yl
- shl eax,FRACBITS
- sub eax,[centeryfrac]
- imul dword [dc_iscale]
- shrd eax,edx,FRACBITS
- add eax,[dc_texturemid]
- mov ebp,eax ;; ebp = frac
-
- mov ebx,[dc_colormap]
-
- mov esi,[dc_source]
-;;
-;; if (dc_hires) frac = 0;
-;;
- test byte [dc_hires],0x01
- jz .texheightcheck
- xor ebp,ebp
-
-;;
-;; Check for power of two
-;;
-.texheightcheck:
- mov edx,[dc_texheight]
- sub edx,1 ;; edx = heightmask
- test edx,[dc_texheight]
- jnz .notpowertwo
-
- test ecx,0x01 ;; Test for odd no. pixels
- jnz .odd
-
-;;
-;; Texture height is a power of two, so we get modular arithmetic by
-;; masking
-;;
-.powertwo:
- mov eax,ebp ;; eax = frac
- sar eax,FRACBITS ;; Integer part
- and eax,edx ;; eax &= heightmask
- movzx eax,byte [esi + eax] ;; eax = texel
- add ebp,[dc_iscale] ;; frac += fracstep
- movzx eax,byte [ebx+eax] ;; Map through colormap
- mov [edi],al ;; Write pixel
- ;; dest += vid.width
- add edi,[vid + viddef_s.width]
-
-.odd:
- mov eax,ebp ;; eax = frac
- sar eax,FRACBITS ;; Integer part
- and eax,edx ;; eax &= heightmask
- movzx eax,byte [esi + eax] ;; eax = texel
- add ebp,[dc_iscale] ;; frac += fracstep
- movzx eax,byte [ebx+eax] ;; Map through colormap
- mov [edi],al ;; Write pixel
- ;; dest += vid.width
- add edi,[vid + viddef_s.width]
-
-
- sub ecx,2 ;; count -= 2
- jg .powertwo
-
- jmp .done
-
-.notpowertwo:
- add edx,1
- shl edx,FRACBITS
- test ebp,ebp
- jns .notpowtwoloop
-
-.makefracpos:
- add ebp,edx ;; frac is negative; make it positive
- js .makefracpos
-
-.notpowtwoloop:
- cmp ebp,edx ;; Reduce mod height
- jl .writenonpowtwo
- sub ebp,edx
- jmp .notpowtwoloop
-
-.writenonpowtwo:
- mov eax,ebp ;; eax = frac
- sar eax,FRACBITS ;; Integer part.
- mov bl,[esi + eax] ;; ebx = colormap + texel
- add ebp,[dc_iscale] ;; frac += fracstep
- movzx eax,byte [ebx] ;; Map through colormap
- mov [edi],al ;; Write pixel
- ;; dest += vid.width
- add edi,[vid + viddef_s.width]
-
- sub ecx,1
- jnz .notpowtwoloop
-
-;;
-
-.done:
- pop ebx ;; restore register variables
- pop edi
- pop esi
- pop ebp ;; restore caller's stack frame pointer
- ret
-
-
-;;----------------------------------------------------------------------
-;;
-;; R_Draw2sMultiPatchColumn : Like R_DrawColumn, but omits transparent
-;; pixels.
-;;
-;; New optimised version 10-01-1998 by D.Fabrice and P.Boris
-;; Revised by G. Dick July 2010 to support the intervening twelve years'
-;; worth of changes to the renderer. Since I only vaguely know what I'm
-;; doing, this is probably rather suboptimal. Help appreciated!
-;;
-;;----------------------------------------------------------------------
-;; fracstep, vid.width in memory
-;; eax = accumulator
-;; ebx = colormap
-;; ecx = count
-;; edx = heightmask
-;; esi = source
-;; edi = dest
-;; ebp = frac
-;;----------------------------------------------------------------------
-
-cglobal R_Draw2sMultiPatchColumn_8_ASM
-; align 16
-R_Draw2sMultiPatchColumn_8_ASM:
- push ebp ;; preserve caller's stack frame pointer
- push esi ;; preserve register variables
- push edi
- push ebx
-;;
-;; dest = ylookup[dc_yl] + columnofs[dc_x];
-;;
- mov ebp,[dc_yl]
- mov edi,[ylookup+ebp*4]
- mov ebx,[dc_x]
- add edi,[columnofs+ebx*4] ;; edi = dest
-;;
-;; pixelcount = yh - yl + 1
-;;
- mov ecx,[dc_yh]
- add ecx,1
- sub ecx,ebp ;; pixel count
- jle near .done ;; nothing to scale
-;;
-;; fracstep = dc_iscale; // But we just use [dc_iscale]
-;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
-;;
- mov eax,ebp ;; dc_yl
- shl eax,FRACBITS
- sub eax,[centeryfrac]
- imul dword [dc_iscale]
- shrd eax,edx,FRACBITS
- add eax,[dc_texturemid]
- mov ebp,eax ;; ebp = frac
-
- mov ebx,[dc_colormap]
-
- mov esi,[dc_source]
-;;
-;; if (dc_hires) frac = 0;
-;;
- test byte [dc_hires],0x01
- jz .texheightcheck
- xor ebp,ebp
-
-;;
-;; Check for power of two
-;;
-.texheightcheck:
- mov edx,[dc_texheight]
- sub edx,1 ;; edx = heightmask
- test edx,[dc_texheight]
- jnz .notpowertwo
-
- test ecx,0x01 ;; Test for odd no. pixels
- jnz .odd
-
-;;
-;; Texture height is a power of two, so we get modular arithmetic by
-;; masking
-;;
-.powertwo:
- mov eax,ebp ;; eax = frac
- sar eax,FRACBITS ;; Integer part
- and eax,edx ;; eax &= heightmask
- movzx eax,byte [esi + eax] ;; eax = texel
- add ebp,[dc_iscale] ;; frac += fracstep
- cmp al,TRANSPARENTPIXEL ;; Is pixel transparent?
- je .nextpowtwoeven ;; If so, advance.
- movzx eax,byte [ebx+eax] ;; Map through colormap
- mov [edi],al ;; Write pixel
-.nextpowtwoeven:
- ;; dest += vid.width
- add edi,[vid + viddef_s.width]
-
-.odd:
- mov eax,ebp ;; eax = frac
- sar eax,FRACBITS ;; Integer part
- and eax,edx ;; eax &= heightmask
- movzx eax,byte [esi + eax] ;; eax = texel
- add ebp,[dc_iscale] ;; frac += fracstep
- cmp al,TRANSPARENTPIXEL ;; Is pixel transparent?
- je .nextpowtwoodd ;; If so, advance.
- movzx eax,byte [ebx+eax] ;; Map through colormap
- mov [edi],al ;; Write pixel
-.nextpowtwoodd:
- ;; dest += vid.width
- add edi,[vid + viddef_s.width]
-
-
- sub ecx,2 ;; count -= 2
- jg .powertwo
-
- jmp .done
-
-.notpowertwo:
- add edx,1
- shl edx,FRACBITS
- test ebp,ebp
- jns .notpowtwoloop
-
-.makefracpos:
- add ebp,edx ;; frac is negative; make it positive
- js .makefracpos
-
-.notpowtwoloop:
- cmp ebp,edx ;; Reduce mod height
- jl .writenonpowtwo
- sub ebp,edx
- jmp .notpowtwoloop
-
-.writenonpowtwo:
- mov eax,ebp ;; eax = frac
- sar eax,FRACBITS ;; Integer part.
- mov bl,[esi + eax] ;; ebx = colormap + texel
- add ebp,[dc_iscale] ;; frac += fracstep
- cmp bl,TRANSPARENTPIXEL ;; Is pixel transparent?
- je .nextnonpowtwo ;; If so, advance.
- movzx eax,byte [ebx] ;; Map through colormap
- mov [edi],al ;; Write pixel
-.nextnonpowtwo:
- ;; dest += vid.width
- add edi,[vid + viddef_s.width]
-
- sub ecx,1
- jnz .notpowtwoloop
-
-;;
-
-.done:
- pop ebx ;; restore register variables
- pop edi
- pop esi
- pop ebp ;; restore caller's stack frame pointer
- ret
-
-;;----------------------------------------------------------------------
-;; R_DrawTranslucentColumnA_8
-;;
-;; Vertical column texture drawer, with transparency. Replaces Doom2's
-;; 'fuzz' effect, which was not so beautiful.
-;; Transparency is always impressive in some way, don't know why...
-;;----------------------------------------------------------------------
-
-cglobal R_DrawTranslucentColumn_8_ASM
-R_DrawTranslucentColumn_8_ASM:
- push ebp ;; preserve caller's stack frame pointer
- push esi ;; preserve register variables
- push edi
- push ebx
-;;
-;; dest = ylookup[dc_yl] + columnofs[dc_x];
-;;
- mov ebp,[dc_yl]
- mov ebx,ebp
- mov edi,[ylookup+ebx*4]
- mov ebx,[dc_x]
- add edi,[columnofs+ebx*4] ;; edi = dest
-;;
-;; pixelcount = yh - yl + 1
-;;
- mov eax,[dc_yh]
- inc eax
- sub eax,ebp ;; pixel count
- mov [pixelcount],eax ;; save for final pixel
- jle near vtdone ;; nothing to scale
-;;
-;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
-;;
- mov ecx,[dc_iscale] ;; fracstep
- mov eax,[centery]
- sub eax,ebp
- imul eax,ecx
- mov edx,[dc_texturemid]
- sub edx,eax
- mov ebx,edx
-
- shr ebx,16 ;; frac int.
- and ebx,0x7f
- shl edx,16 ;; y frac up
-
- mov ebp,ecx
- shl ebp,16 ;; fracstep f. up
- shr ecx,16 ;; fracstep i. ->cl
- and cl,0x7f
- push cx
- mov ecx,edx
- pop cx
- mov edx,[dc_colormap]
- mov esi,[dc_source]
-;;
-;; lets rock :) !
-;;
- mov eax,[pixelcount]
- shr eax,0x2
- test byte [pixelcount],0x3
- mov ch,al ;; quad count
- mov eax,[dc_transmap]
- je vt4quadloop
-;;
-;; do un-even pixel
-;;
- test byte [pixelcount],0x1
- je trf2
-
- mov ah,[esi+ebx] ;; fetch texel : colormap number
- add ecx,ebp
- adc bl,cl
- mov al,[edi] ;; fetch dest : index into colormap
- and bl,0x7f
- mov dl,[eax]
- mov dl,[edx]
- mov [edi],dl
-pf: add edi,0x12345678
-;;
-;; do two non-quad-aligned pixels
-;;
-trf2: test byte [pixelcount],0x2
- je trf3
-
- mov ah,[esi+ebx] ;; fetch texel : colormap number
- add ecx,ebp
- adc bl,cl
- mov al,[edi] ;; fetch dest : index into colormap
- and bl,0x7f
- mov dl,[eax]
- mov dl,[edx]
- mov [edi],dl
-pg: add edi,0x12345678
-
- mov ah,[esi+ebx] ;; fetch texel : colormap number
- add ecx,ebp
- adc bl,cl
- mov al,[edi] ;; fetch dest : index into colormap
- and bl,0x7f
- mov dl,[eax]
- mov dl,[edx]
- mov [edi],dl
-ph: add edi,0x12345678
-;;
-;; test if there was at least 4 pixels
-;;
-trf3: test ch,0xff ;; test quad count
- je near vtdone
-
-;;
-;; ebp : ystep frac. upper 24 bits
-;; edx : y frac. upper 24 bits
-;; ebx : y i. lower 7 bits, masked for index
-;; ecx : ch = counter, cl = y step i.
-;; eax : colormap aligned 256
-;; esi : source texture column
-;; edi : dest screen
-;;
-vt4quadloop:
- mov ah,[esi+ebx] ;; fetch texel : colormap number
- mov [tystep],ebp
-pi: add edi,0x12345678
- mov al,[edi] ;; fetch dest : index into colormap
-pj: sub edi,0x12345678
- mov ebp,edi
-pk: sub edi,0x12345678
- jmp short inloop
-align 4
-vtquadloop:
- add ecx,[tystep]
- adc bl,cl
-q1: add ebp,0x23456789
- and bl,0x7f
- mov dl,[eax]
- mov ah,[esi+ebx] ;; fetch texel : colormap number
- mov dl,[edx]
- mov [edi],dl
- mov al,[ebp] ;; fetch dest : index into colormap
-inloop:
- add ecx,[tystep]
- adc bl,cl
-q2: add edi,0x23456789
- and bl,0x7f
- mov dl,[eax]
- mov ah,[esi+ebx] ;; fetch texel : colormap number
- mov dl,[edx]
- mov [ebp+0x0],dl
- mov al,[edi] ;; fetch dest : index into colormap
-
- add ecx,[tystep]
- adc bl,cl
-q3: add ebp,0x23456789
- and bl,0x7f
- mov dl,[eax]
- mov ah,[esi+ebx] ;; fetch texel : colormap number
- mov dl,[edx]
- mov [edi],dl
- mov al,[ebp] ;; fetch dest : index into colormap
-
- add ecx,[tystep]
- adc bl,cl
-q4: add edi,0x23456789
- and bl,0x7f
- mov dl,[eax]
- mov ah,[esi+ebx] ;; fetch texel : colormap number
- mov dl,[edx]
- mov [ebp],dl
- mov al,[edi] ;; fetch dest : index into colormap
-
- dec ch
- jne vtquadloop
-vtdone:
- pop ebx
- pop edi
- pop esi
- pop ebp
- ret
-
-;;----------------------------------------------------------------------
-;; R_DrawShadeColumn
-;;
-;; for smoke..etc.. test.
-;;----------------------------------------------------------------------
-cglobal R_DrawShadeColumn_8_ASM
-R_DrawShadeColumn_8_ASM:
- push ebp ;; preserve caller's stack frame pointer
- push esi ;; preserve register variables
- push edi
- push ebx
-
-;;
-;; dest = ylookup[dc_yl] + columnofs[dc_x];
-;;
- mov ebp,[dc_yl]
- mov ebx,ebp
- mov edi,[ylookup+ebx*4]
- mov ebx,[dc_x]
- add edi,[columnofs+ebx*4] ;; edi = dest
-;;
-;; pixelcount = yh - yl + 1
-;;
- mov eax,[dc_yh]
- inc eax
- sub eax,ebp ;; pixel count
- mov [pixelcount],eax ;; save for final pixel
- jle near shdone ;; nothing to scale
-;;
-;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
-;;
- mov ecx,[dc_iscale] ;; fracstep
- mov eax,[centery]
- sub eax,ebp
- imul eax,ecx
- mov edx,[dc_texturemid]
- sub edx,eax
- mov ebx,edx
- shr ebx,16 ;; frac int.
- and ebx,byte +0x7f
- shl edx,16 ;; y frac up
-
- mov ebp,ecx
- shl ebp,16 ;; fracstep f. up
- shr ecx,16 ;; fracstep i. ->cl
- and cl,0x7f
-
- mov esi,[dc_source]
-;;
-;; lets rock :) !
-;;
- mov eax,[pixelcount]
- mov dh,al
- shr eax,2
- mov ch,al ;; quad count
- mov eax,[colormaps]
- test dh,3
- je sh4quadloop
-;;
-;; do un-even pixel
-;;
- test dh,0x1
- je shf2
-
- mov ah,[esi+ebx] ;; fetch texel : colormap number
- add edx,ebp
- adc bl,cl
- mov al,[edi] ;; fetch dest : index into colormap
- and bl,0x7f
- mov dl,[eax]
- mov [edi],dl
-pl: add edi,0x12345678
-;;
-;; do two non-quad-aligned pixels
-;;
-shf2:
- test dh,0x2
- je shf3
-
- mov ah,[esi+ebx] ;; fetch texel : colormap number
- add edx,ebp
- adc bl,cl
- mov al,[edi] ;; fetch dest : index into colormap
- and bl,0x7f
- mov dl,[eax]
- mov [edi],dl
-pm: add edi,0x12345678
-
- mov ah,[esi+ebx] ;; fetch texel : colormap number
- add edx,ebp
- adc bl,cl
- mov al,[edi] ;; fetch dest : index into colormap
- and bl,0x7f
- mov dl,[eax]
- mov [edi],dl
-pn: add edi,0x12345678
-;;
-;; test if there was at least 4 pixels
-;;
-shf3:
- test ch,0xff ;; test quad count
- je near shdone
-
-;;
-;; ebp : ystep frac. upper 24 bits
-;; edx : y frac. upper 24 bits
-;; ebx : y i. lower 7 bits, masked for index
-;; ecx : ch = counter, cl = y step i.
-;; eax : colormap aligned 256
-;; esi : source texture column
-;; edi : dest screen
-;;
-sh4quadloop:
- mov dh,0x7f ;; prep mask
- mov ah,[esi+ebx] ;; fetch texel : colormap number
- mov [tystep],ebp
-po: add edi,0x12345678
- mov al,[edi] ;; fetch dest : index into colormap
-pp: sub edi,0x12345678
- mov ebp,edi
-pq: sub edi,0x12345678
- jmp short shinloop
-
-align 4
-shquadloop:
- add edx,[tystep]
- adc bl,cl
- and bl,dh
-q5: add ebp,0x12345678
- mov dl,[eax]
- mov ah,[esi+ebx] ;; fetch texel : colormap number
- mov [edi],dl
- mov al,[ebp] ;; fetch dest : index into colormap
-shinloop:
- add edx,[tystep]
- adc bl,cl
- and bl,dh
-q6: add edi,0x12345678
- mov dl,[eax]
- mov ah,[esi+ebx] ;; fetch texel : colormap number
- mov [ebp],dl
- mov al,[edi] ;; fetch dest : index into colormap
-
- add edx,[tystep]
- adc bl,cl
- and bl,dh
-q7: add ebp,0x12345678
- mov dl,[eax]
- mov ah,[esi+ebx] ;; fetch texel : colormap number
- mov [edi],dl
- mov al,[ebp] ;; fetch dest : index into colormap
-
- add edx,[tystep]
- adc bl,cl
- and bl,dh
-q8: add edi,0x12345678
- mov dl,[eax]
- mov ah,[esi+ebx] ;; fetch texel : colormap number
- mov [ebp],dl
- mov al,[edi] ;; fetch dest : index into colormap
-
- dec ch
- jne shquadloop
-
-shdone:
- pop ebx ;; restore register variables
- pop edi
- pop esi
- pop ebp ;; restore caller's stack frame pointer
- ret
-
-
-;; ========================================================================
-;; Rasterization of the segments of a LINEAR polygne textur of manire.
-;; It is thus a question of interpolating coordinate them at the edges of texture in
-;; the time that the X-coordinates minx/maxx for each line.
-;; the argument ' dir' indicates which edges of texture are Interpol?:
-;; 0: segments associs at edge TOP? and BOTTOM? (constant TY)
-;; 1: segments associs at the LEFT and RIGHT edge (constant TX)
-;; ========================================================================
-;;
-;; void rasterize_segment_tex( LONG x1, LONG y1, LONG x2, LONG y2, LONG tv1, LONG tv2, LONG tc, LONG dir );
-;; ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8
-;;
-;; Pour dir = 0, (tv1,tv2) = (tX1,tX2), tc = tY, en effet TY est constant.
-;;
-;; Pour dir = 1, (tv1,tv2) = (tY1,tY2), tc = tX, en effet TX est constant.
-;;
-;;
-;; Uses: extern struct rastery *_rastertab;
-;;
-
-MINX EQU 0
-MAXX EQU 4
-TX1 EQU 8
-TY1 EQU 12
-TX2 EQU 16
-TY2 EQU 20
-RASTERY_SIZEOF EQU 24
-
-cglobal rasterize_segment_tex
-rasterize_segment_tex:
- push ebp
- mov ebp,esp
-
- sub esp,byte +0x8 ;; allocate the local variables
-
- push ebx
- push esi
- push edi
- o16 mov ax,es
- push eax
-
-;; #define DX [ebp-4]
-;; #define TD [ebp-8]
-
- mov eax,[ebp+0xc] ;; y1
- mov ebx,[ebp+0x14] ;; y2
- cmp ebx,eax
- je near .L_finished ;; special (y1==y2) segment horizontal, exit!
-
- jg near .L_rasterize_right
-
-;;rasterize_left: ;; one rasterize a segment LEFT of the polygne
-
- mov ecx,eax
- sub ecx,ebx
- inc ecx ;; y1-y2+1
-
- mov eax,RASTERY_SIZEOF
- mul ebx ;; * y2
- mov esi,[prastertab]
- add esi,eax ;; point into rastertab[y2]
-
- mov eax,[ebp+0x8] ;; ARG1
- sub eax,[ebp+0x10] ;; ARG3
- shl eax,0x10 ;; ((x1-x2)<cl
- andb $0x7f,%cl
-
- movl C(dc_source),%esi
-
-//
-// lets rock :) !
-//
- movl C(pixelcount),%eax
- movb %al,%dh
- shrl $2,%eax
- movb %al,%ch // quad count
- movl C(dc_colormap),%eax
- testb $3,%dh
- jz v4quadloop
-
-//
-// do un-even pixel
-//
- testb $1,%dh
- jz 2f
-
- movb (%esi,%ebx),%al // prep un-even loops
- addl %ebp,%edx // ypos f += ystep f
- adcb %cl,%bl // ypos i += ystep i
- movb (%eax),%dl // colormap texel
- andb $0x7f,%bl // mask 0-127 texture index
- movb %dl,(%edi) // output pixel
- addl C(vidwidth),%edi
-
-//
-// do two non-quad-aligned pixels
-//
-2:
- testb $2,%dh
- jz 3f
-
- movb (%esi,%ebx),%al // fetch source texel
- addl %ebp,%edx // ypos f += ystep f
- adcb %cl,%bl // ypos i += ystep i
- movb (%eax),%dl // colormap texel
- andb $0x7f,%bl // mask 0-127 texture index
- movb %dl,(%edi) // output pixel
-
- movb (%esi,%ebx),%al // fetch source texel
- addl %ebp,%edx // ypos f += ystep f
- adcb %cl,%bl // ypos i += ystep i
- movb (%eax),%dl // colormap texel
- andb $0x7f,%bl // mask 0-127 texture index
- addl C(vidwidth),%edi
- movb %dl,(%edi) // output pixel
-
- addl C(vidwidth),%edi
-
-//
-// test if there was at least 4 pixels
-//
-3:
- testb $0xFF,%ch // test quad count
- jz vdone
-
-//
-// ebp : ystep frac. upper 24 bits
-// edx : y frac. upper 24 bits
-// ebx : y i. lower 7 bits, masked for index
-// ecx : ch = counter, cl = y step i.
-// eax : colormap aligned 256
-// esi : source texture column
-// edi : dest screen
-//
-v4quadloop:
- movb $0x7f,%dh // prep mask
-// .align 4
-vquadloop:
- movb (%esi,%ebx),%al // prep loop
- addl %ebp,%edx // ypos f += ystep f
- adcb %cl,%bl // ypos i += ystep i
- movb (%eax),%dl // colormap texel
- movb %dl,(%edi) // output pixel
- andb $0x7f,%bl // mask 0-127 texture index
-
- movb (%esi,%ebx),%al // fetch source texel
- addl %ebp,%edx
- adcb %cl,%bl
- movb (%eax),%dl
-p1: movb %dl,0x12345678(%edi)
- andb $0x7f,%bl
-
- movb (%esi,%ebx),%al // fetch source texel
- addl %ebp,%edx
- adcb %cl,%bl
- movb (%eax),%dl
-p2: movb %dl,2*0x12345678(%edi)
- andb $0x7f,%bl
-
- movb (%esi,%ebx),%al // fetch source texel
- addl %ebp,%edx
- adcb %cl,%bl
- movb (%eax),%dl
-p3: movb %dl,3*0x12345678(%edi)
- andb $0x7f,%bl
-
-p4: addl $4*0x12345678,%edi
-
- decb %ch
- jnz vquadloop
-
-vdone:
- popl %ebx // restore register variables
- popl %edi
- popl %esi
- popl %ebp // restore caller's stack frame pointer
- ret
-
-#ifdef HORIZONTALDRAW
-// --------------------------------------------------------------------------
-// Horizontal Column Drawer Optimisation
-// --------------------------------------------------------------------------
-
-#ifdef LINUX
- .align 2
-#else
- .align 5
-#endif
-.globl C(R_DrawHColumn_8)
-C(R_DrawHColumn_8):
- pushl %ebp
- pushl %esi
- pushl %edi
- pushl %ebx
-
-//
-// dest = yhlookup[dc_x] + hcolumnofs[dc_yl];
-//
- movl C(dc_x),%ebx
- movl C(yhlookup)(,%ebx,4),%edi
- movl C(dc_yl),%ebp
- movl %ebp,%ebx
- addl C(hcolumnofs)(,%ebx,4),%edi // edi = dest
-
-//
-// pixelcount = yh - yl + 1
-//
- movl C(dc_yh),%eax
- incl %eax
- subl %ebp,%eax // pixel count
- movl %eax,C(pixelcount) // save for final pixel
- jle vhdone // nothing to scale
-
-//
-// frac = dc_texturemid - (centery-dc_yl)*fracstep;
-//
- movl C(dc_iscale),%ecx // fracstep
- movl C(centery),%eax
- subl %ebp,%eax
- imul %ecx,%eax
- movl C(dc_texturemid),%edx
- subl %eax,%edx
- movl %edx,%ebx
- shrl $16,%ebx // frac int.
- andl $0x0000007f,%ebx
- shll $16,%edx // y frac up
-
- movl %ecx,%ebp
- shll $16,%ebp // fracstep f. up
- shrl $16,%ecx // fracstep i. ->cl
- andb $0x7f,%cl
-
- movl C(dc_source),%esi
-
-//
-// lets rock :) !
-//
-
- movl C(pixelcount),%eax
- movb %al,%dh
- shrl $2,%eax
- movb %al,%ch // quad count
-
- testb %ch, %ch
- jz vhnearlydone
-
- movl C(dc_colormap),%eax
- decl %edi //-----
-
-vhloop:
- movb (%esi,%ebx),%al // fetch source texel
- addl %ebp,%edx
- adcb %cl,%bl
- andb $0x7f,%bl
- incl %edi //-----
- movb (%eax),%dh
- movb %dh,(%edi) //-----
-
- movb (%esi,%ebx),%al // fetch source texel
- addl %ebp,%edx
- incl %edi //-----
- adcb %cl,%bl
- movb (%eax),%dl
- andb $0x7f,%bl
- movb %dl,(%edi) //-----
-
- movb (%esi,%ebx),%al // fetch source texel
- addl %ebp,%edx
- adcb %cl,%bl
-// shll $16,%edx
- andb $0x7f,%bl
- incl %edi //-----
- movb (%eax),%dh
- movb %dh,(%edi) //-----
-
- movb (%esi,%ebx),%al // fetch source texel
- addl %ebp,%edx
- incl %edi //-----
- adcb %cl,%bl
- movb (%eax),%dl
- andb $0x7f,%bl
- movb %dl,(%edi)
-// movl %edx,(%edi)
-// addl $4,%edi
-
- decb %ch
- jnz vhloop
-
-vhnearlydone:
-// movl C(pixelcount)
-
-vhdone:
- popl %ebx
- popl %edi
- popl %esi
- popl %ebp
- ret
-
-
-// --------------------------------------------------------------------------
-// Rotate a buffer 90 degree in clockwise order after horiz.col. draws
-// --------------------------------------------------------------------------
-
-#ifdef LINUX
- .align 2
-#else
- .align 5
-#endif
-.globl C(R_RotateBuffer)
-C(R_RotateBuffer):
- pushl %ebp
- pushl %esi
- pushl %edi
- pushl %ebx
-
-
- movl C(dc_source),%esi
- movl C(dc_colormap),%edi
-
-
- movb (%esi),%ah
- addl $200,%esi
- movb (%ebx),%al
- addl $200,%ebx
- bswap %eax
- movb (%esi),%ah
- addl $200,%esi
- movb (%ebx),%al
- addl $200,%ebx
- movl %eax,(%edi)
- addl $4,%edi
-
-
- popl %ebx
- popl %edi
- popl %esi
- popl %ebp
- ret
-#endif
-
-//----------------------------------------------------------------------
-//13-02-98:
-// R_DrawSkyColumn : same as R_DrawColumn but:
-//
-// - wrap around 256 instead of 127.
-// this is needed because we have a higher texture for mouselook,
-// we need at least 200 lines for the sky.
-//
-// NOTE: the sky should never wrap, so it could use a faster method.
-// for the moment, we'll still use a wrapping method...
-//
-// IT S JUST A QUICK CUT N PASTE, WAS NOT OPTIMISED AS IT SHOULD BE !!!
-//
-//----------------------------------------------------------------------
-
-#ifdef LINUX
- .align 2
-#else
- .align 5
-#endif
-.globl C(R_DrawSkyColumn_8)
-C(R_DrawSkyColumn_8):
- pushl %ebp
- pushl %esi
- pushl %edi
- pushl %ebx
-
-//
-// dest = ylookup[dc_yl] + columnofs[dc_x];
-//
- movl C(dc_yl),%ebp
- movl %ebp,%ebx
- movl C(ylookup)(,%ebx,4),%edi
- movl C(dc_x),%ebx
- addl C(columnofs)(,%ebx,4),%edi // edi = dest
-
-//
-// pixelcount = yh - yl + 1
-//
- movl C(dc_yh),%eax
- incl %eax
- subl %ebp,%eax // pixel count
- movl %eax,C(pixelcount) // save for final pixel
- jle vskydone // nothing to scale
-
-//
-// frac = dc_texturemid - (centery-dc_yl)*fracstep;
-//
- movl C(dc_iscale),%ecx // fracstep
- movl C(centery),%eax
- subl %ebp,%eax
- imul %ecx,%eax
- movl C(dc_texturemid),%edx
- subl %eax,%edx
- movl %edx,%ebx
- shrl $16,%ebx // frac int.
- andl $0x000000ff,%ebx
- shll $16,%edx // y frac up
-
- movl %ecx,%ebp
- shll $16,%ebp // fracstep f. up
- shrl $16,%ecx // fracstep i. ->cl
-
- movl C(dc_source),%esi
-
-//
-// lets rock :) !
-//
- movl C(pixelcount),%eax
- movb %al,%dh
- shrl $2,%eax
- movb %al,%ch // quad count
- movl C(dc_colormap),%eax
- testb $3,%dh
- jz v4skyquadloop
-
-//
-// do un-even pixel
-//
- testb $1,%dh
- jz 2f
-
- movb (%esi,%ebx),%al // prep un-even loops
- addl %ebp,%edx // ypos f += ystep f
- adcb %cl,%bl // ypos i += ystep i
- movb (%eax),%dl // colormap texel
- movb %dl,(%edi) // output pixel
- addl C(vidwidth),%edi
-
-//
-// do two non-quad-aligned pixels
-//
-2:
- testb $2,%dh
- jz 3f
-
- movb (%esi,%ebx),%al // fetch source texel
- addl %ebp,%edx // ypos f += ystep f
- adcb %cl,%bl // ypos i += ystep i
- movb (%eax),%dl // colormap texel
- movb %dl,(%edi) // output pixel
-
- movb (%esi,%ebx),%al // fetch source texel
- addl %ebp,%edx // ypos f += ystep f
- adcb %cl,%bl // ypos i += ystep i
- movb (%eax),%dl // colormap texel
- addl C(vidwidth),%edi
- movb %dl,(%edi) // output pixel
-
- addl C(vidwidth),%edi
-
-//
-// test if there was at least 4 pixels
-//
-3:
- testb $0xFF,%ch // test quad count
- jz vskydone
-
-//
-// ebp : ystep frac. upper 24 bits
-// edx : y frac. upper 24 bits
-// ebx : y i. lower 7 bits, masked for index
-// ecx : ch = counter, cl = y step i.
-// eax : colormap aligned 256
-// esi : source texture column
-// edi : dest screen
-//
-v4skyquadloop:
-// .align 4
-vskyquadloop:
- movb (%esi,%ebx),%al // prep loop
- addl %ebp,%edx // ypos f += ystep f
- adcb %cl,%bl // ypos i += ystep i
- movb (%eax),%dl // colormap texel
- movb %dl,(%edi) // output pixel
-
- movb (%esi,%ebx),%al // fetch source texel
- addl %ebp,%edx
- adcb %cl,%bl
- movb (%eax),%dl
-p1b: movb %dl,0x12345678(%edi)
-
- movb (%esi,%ebx),%al // fetch source texel
- addl %ebp,%edx
- adcb %cl,%bl
- movb (%eax),%dl
-p2b: movb %dl,2*0x12345678(%edi)
-
- movb (%esi,%ebx),%al // fetch source texel
- addl %ebp,%edx
- adcb %cl,%bl
- movb (%eax),%dl
-p3b: movb %dl,3*0x12345678(%edi)
-
-p4b: addl $4*0x12345678,%edi
-
- decb %ch
- jnz vskyquadloop
-
-vskydone:
- popl %ebx // restore register variables
- popl %edi
- popl %esi
- popl %ebp // restore caller's stack frame pointer
- ret
-
-
-
-//----------------------------------------------------------------------
-//
-// R_DrawSpan
-//
-// Horizontal texture mapping
-//
-//----------------------------------------------------------------------
-
- .data
-
-ystep: .long 0
-xstep: .long 0
-C(texwidth): .long 64 // texture width
-#if !defined( LINUX) && !defined( __OS2__)
- .text
-#endif
-#ifdef LINUX
- .align 2
-#else
- .align 4
-#endif
-.globl C(R_DrawSpan_8)
-C(R_DrawSpan_8):
- pushl %ebp // preserve caller's stack frame pointer
- pushl %esi // preserve register variables
- pushl %edi
- pushl %ebx
-
-
-//
-// find loop count
-//
- movl C(ds_x2),%eax
- incl %eax
- subl C(ds_x1),%eax // pixel count
- movl %eax,C(pixelcount) // save for final pixel
- js hdone // nothing to scale
- shrl $1,%eax // double pixel count
- movl %eax,C(loopcount)
-
-//
-// build composite position
-//
- movl C(ds_xfrac),%ebp
- shll $10,%ebp
- andl $0x0ffff0000,%ebp
- movl C(ds_yfrac),%eax
- shrl $6,%eax
- andl $0x0ffff,%eax
- movl C(ds_y),%edi
- orl %eax,%ebp
-
- movl C(ds_source),%esi
-
-//
-// calculate screen dest
-//
-
- movl C(ylookup)(,%edi,4),%edi
- movl C(ds_x1),%eax
- addl C(columnofs)(,%eax,4),%edi
-
-//
-// build composite step
-//
- movl C(ds_xstep),%ebx
- shll $10,%ebx
- andl $0x0ffff0000,%ebx
- movl C(ds_ystep),%eax
- shrl $6,%eax
- andl $0x0ffff,%eax
- orl %eax,%ebx
-
- //movl %eax,OFFSET hpatch1+2 // convice tasm to modify code...
- movl %ebx,hpatch1+2
- //movl %eax,OFFSET hpatch2+2 // convice tasm to modify code...
- movl %ebx,hpatch2+2
- movl %esi,hpatch3+2
- movl %esi,hpatch4+2
-// %eax aligned colormap
-// %ebx aligned colormap
-// %ecx,%edx scratch
-// %esi virtual source
-// %edi moving destination pointer
-// %ebp frac
- movl C(ds_colormap),%eax
-// shld $22,%ebp,%ecx // begin calculating third pixel (y units)
-// shld $6,%ebp,%ecx // begin calculating third pixel (x units)
- movl %ebp,%ecx
- addl %ebx,%ebp // advance frac pointer
- shrw $10,%cx
- roll $6,%ecx
- andl $4095,%ecx // finish calculation for third pixel
-// shld $22,%ebp,%edx // begin calculating fourth pixel (y units)
-// shld $6,%ebp,%edx // begin calculating fourth pixel (x units)
- movl %ebp,%edx
- shrw $10,%dx
- roll $6,%edx
- addl %ebx,%ebp // advance frac pointer
- andl $4095,%edx // finish calculation for fourth pixel
- movl %eax,%ebx
- movb (%esi,%ecx),%al // get first pixel
- movb (%esi,%edx),%bl // get second pixel
- testl $0x0fffffffe,C(pixelcount)
- movb (%eax),%dl // color translate first pixel
-
-// jnz hdoubleloop // at least two pixels to map
-// jmp hchecklast
-
-// movw $0xf0f0,%dx //see visplanes start
-
- jz hchecklast
- movb (%ebx),%dh // color translate second pixel
- movl C(loopcount),%esi
-// .align 4
-hdoubleloop:
-// shld $22,%ebp,%ecx // begin calculating third pixel (y units)
-// shld $6,%ebp,%ecx // begin calculating third pixel (x units)
- movl %ebp,%ecx
- shrw $10,%cx
- roll $6,%ecx
-hpatch1:
- addl $0x012345678,%ebp // advance frac pointer
- movw %dx,(%edi) // write first pixel
- andl $4095,%ecx // finish calculation for third pixel
-// shld $22,%ebp,%edx // begin calculating fourth pixel (y units)
-// shld $6,%ebp,%edx // begin calculating fourth pixel (x units)
- movl %ebp,%edx
- shrw $10,%dx
- roll $6,%edx
-hpatch3:
- movb 0x012345678(%ecx),%al // get third pixel
-// movb %bl,1(%edi) // write second pixel
- andl $4095,%edx // finish calculation for fourth pixel
-hpatch2:
- addl $0x012345678,%ebp // advance frac pointer
-hpatch4:
- movb 0x012345678(%edx),%bl // get fourth pixel
- movb (%eax),%dl // color translate third pixel
- addl $2,%edi // advance to third pixel destination
- decl %esi // done with loop?
- movb (%ebx),%dh // color translate fourth pixel
- jnz hdoubleloop
-
-// check for final pixel
-hchecklast:
- testl $1,C(pixelcount)
- jz hdone
- movb %dl,(%edi) // write final pixel
-
-hdone:
- popl %ebx // restore register variables
- popl %edi
- popl %esi
- popl %ebp // restore caller's stack frame pointer
- ret
-
-
-//.endif
-
-
-//----------------------------------------------------------------------
-// R_DrawTransColumn
-//
-// Vertical column texture drawer, with transparency. Replaces Doom2's
-// 'fuzz' effect, which was not so beautiful.
-// Transparency is always impressive in some way, don't know why...
-//----------------------------------------------------------------------
-
-#ifdef LINUX
- .align 2
-#else
- .align 5
-#endif
-
-.globl C(R_DrawTranslucentColumn_8)
-C(R_DrawTranslucentColumn_8):
- pushl %ebp // preserve caller's stack frame pointer
- pushl %esi // preserve register variables
- pushl %edi
- pushl %ebx
-
-//
-// dest = ylookup[dc_yl] + columnofs[dc_x];
-//
- movl C(dc_yl),%ebp
- movl %ebp,%ebx
- movl C(ylookup)(,%ebx,4),%edi
- movl C(dc_x),%ebx
- addl C(columnofs)(,%ebx,4),%edi // edi = dest
-
-//
-// pixelcount = yh - yl + 1
-//
- movl C(dc_yh),%eax
- incl %eax
- subl %ebp,%eax // pixel count
- movl %eax,C(pixelcount) // save for final pixel
- jle vtdone // nothing to scale
-
-//
-// frac = dc_texturemid - (centery-dc_yl)*fracstep;
-//
- movl C(dc_iscale),%ecx // fracstep
- movl C(centery),%eax
- subl %ebp,%eax
- imul %ecx,%eax
- movl C(dc_texturemid),%edx
- subl %eax,%edx
- movl %edx,%ebx
-
- shrl $16,%ebx // frac int.
- andl $0x0000007f,%ebx
- shll $16,%edx // y frac up
-
- movl %ecx,%ebp
- shll $16,%ebp // fracstep f. up
- shrl $16,%ecx // fracstep i. ->cl
- andb $0x7f,%cl
- pushw %cx
- movl %edx,%ecx
- popw %cx
- movl C(dc_colormap),%edx
- movl C(dc_source),%esi
-
-//
-// lets rock :) !
-//
- movl C(pixelcount),%eax
- shrl $2,%eax
- testb $0x03,C(pixelcount)
- movb %al,%ch // quad count
- movl C(dc_transmap),%eax
- jz vt4quadloop
-//
-// do un-even pixel
-//
- testb $1,C(pixelcount)
- jz 2f
-
- movb (%esi,%ebx),%ah // fetch texel : colormap number
- addl %ebp,%ecx
- adcb %cl,%bl
- movb (%edi),%al // fetch dest : index into colormap
- andb $0x7f,%bl
- movb (%eax),%dl
- movb (%edx), %dl // use colormap now !
- movb %dl,(%edi)
- addl C(vidwidth),%edi
-//
-// do two non-quad-aligned pixels
-//
-2:
- testb $2,C(pixelcount)
- jz 3f
-
- movb (%esi,%ebx),%ah // fetch texel : colormap number
- addl %ebp,%ecx
- adcb %cl,%bl
- movb (%edi),%al // fetch dest : index into colormap
- andb $0x7f,%bl
- movb (%eax),%dl
- movb (%edx), %dl // use colormap now !
- movb %dl,(%edi)
- addl C(vidwidth),%edi
-
- movb (%esi,%ebx),%ah // fetch texel : colormap number
- addl %ebp,%ecx
- adcb %cl,%bl
- movb (%edi),%al // fetch dest : index into colormap
- andb $0x7f,%bl
- movb (%eax),%dl
- movb (%edx), %dl // use colormap now !
- movb %dl,(%edi)
- addl C(vidwidth),%edi
-
-//
-// test if there was at least 4 pixels
-//
-3:
- testb $0xFF,%ch // test quad count
- jz vtdone
-
-//
-// tystep : ystep frac. upper 24 bits
-// edx : upper 24 bit : colomap
-// dl : tmp pixel to write
-// ebx : y i. lower 7 bits, masked for index
-// ecx : y frac. upper 16 bits
-// ecx : ch = counter, cl = y step i.
-// eax : transmap aligned 65535 (upper 16 bit)
-// ah : background pixel (from the screen buffer)
-// al : foreground pixel (from the texture)
-// esi : source texture column
-// ebp,edi : dest screen
-//
-vt4quadloop:
- movb (%esi,%ebx),%ah // fetch texel : colormap number
-p5: movb 0x12345678(%edi),%al // fetch dest : index into colormap
-
- movl %ebp,C(tystep)
- movl %edi,%ebp
- subl C(vidwidth),%edi
- jmp inloop
-// .align 4
-vtquadloop:
- addl C(tystep),%ecx
- adcb %cl,%bl
-p6: addl $2*0x12345678,%ebp
- andb $0x7f,%bl
- movb (%eax),%dl
- movb (%esi,%ebx),%ah // fetch texel : colormap number
- movb (%edx), %dl // use colormap now !
- movb %dl,(%edi)
- movb (%ebp),%al // fetch dest : index into colormap
-inloop:
- addl C(tystep),%ecx
- adcb %cl,%bl
-p7: addl $2*0x12345678,%edi
- andb $0x7f,%bl
- movb (%eax),%dl
- movb (%esi,%ebx),%ah // fetch texel : colormap number
- movb (%edx), %dl // use colormap now !
- movb %dl,(%ebp)
- movb (%edi),%al // fetch dest : index into colormap
-
- addl C(tystep),%ecx
- adcb %cl,%bl
-p8: addl $2*0x12345678,%ebp
- andb $0x7f,%bl
- movb (%eax),%dl
- movb (%esi,%ebx),%ah // fetch texel : colormap number
- movb (%edx), %dl // use colormap now !
- movb %dl,(%edi)
- movb (%ebp),%al // fetch dest : index into colormap
-
- addl C(tystep),%ecx
- adcb %cl,%bl
-p9: addl $2*0x12345678,%edi
- andb $0x7f,%bl
- movb (%eax),%dl
- movb (%esi,%ebx),%ah // fetch texel : colormap number
- movb (%edx), %dl // use colormap now !
- movb %dl,(%ebp)
- movb (%edi),%al // fetch dest : index into colormap
-
- decb %ch
- jnz vtquadloop
-
-vtdone:
- popl %ebx // restore register variables
- popl %edi
- popl %esi
- popl %ebp // restore caller's stack frame pointer
- ret
-
-#endif // ifdef USEASM
-
-
-
-//----------------------------------------------------------------------
-// R_DrawShadeColumn
-//
-// for smoke..etc.. test.
-//----------------------------------------------------------------------
-
-#ifdef LINUX
- .align 2
-#else
- .align 5
-#endif
-.globl C(R_DrawShadeColumn_8)
-C(R_DrawShadeColumn_8):
- pushl %ebp // preserve caller's stack frame pointer
- pushl %esi // preserve register variables
- pushl %edi
- pushl %ebx
-
-//
-// dest = ylookup[dc_yl] + columnofs[dc_x];
-//
- movl C(dc_yl),%ebp
- movl %ebp,%ebx
- movl C(ylookup)(,%ebx,4),%edi
- movl C(dc_x),%ebx
- addl C(columnofs)(,%ebx,4),%edi // edi = dest
-
-//
-// pixelcount = yh - yl + 1
-//
- movl C(dc_yh),%eax
- incl %eax
- subl %ebp,%eax // pixel count
- movl %eax,C(pixelcount) // save for final pixel
- jle shdone // nothing to scale
-
-//
-// frac = dc_texturemid - (centery-dc_yl)*fracstep;
-//
- movl C(dc_iscale),%ecx // fracstep
- movl C(centery),%eax
- subl %ebp,%eax
- imul %ecx,%eax
- movl C(dc_texturemid),%edx
- subl %eax,%edx
- movl %edx,%ebx
- shrl $16,%ebx // frac int.
- andl $0x0000007f,%ebx
- shll $16,%edx // y frac up
-
- movl %ecx,%ebp
- shll $16,%ebp // fracstep f. up
- shrl $16,%ecx // fracstep i. ->cl
- andb $0x7f,%cl
-
- movl C(dc_source),%esi
-
-//
-// lets rock :) !
-//
- movl C(pixelcount),%eax
- movb %al,%dh
- shrl $2,%eax
- movb %al,%ch // quad count
- movl C(colormaps),%eax
- testb $0x03,%dh
- jz sh4quadloop
-
-//
-// do un-even pixel
-//
- testb $1,%dh
- jz 2f
-
- movb (%esi,%ebx),%ah // fetch texel : colormap number
- addl %ebp,%edx
- adcb %cl,%bl
- movb (%edi),%al // fetch dest : index into colormap
- andb $0x7f,%bl
- movb (%eax),%dl
- movb %dl,(%edi)
- addl C(vidwidth),%edi
-
-//
-// do two non-quad-aligned pixels
-//
-2:
- testb $2,%dh
- jz 3f
-
- movb (%esi,%ebx),%ah // fetch texel : colormap number
- addl %ebp,%edx
- adcb %cl,%bl
- movb (%edi),%al // fetch dest : index into colormap
- andb $0x7f,%bl
- movb (%eax),%dl
- movb %dl,(%edi)
- addl C(vidwidth),%edi
-
- movb (%esi,%ebx),%ah // fetch texel : colormap number
- addl %ebp,%edx
- adcb %cl,%bl
- movb (%edi),%al // fetch dest : index into colormap
- andb $0x7f,%bl
- movb (%eax),%dl
- movb %dl,(%edi)
- addl C(vidwidth),%edi
-
-//
-// test if there was at least 4 pixels
-//
-3:
- testb $0xFF,%ch // test quad count
- jz shdone
-
-//
-// ebp : ystep frac. upper 24 bits
-// edx : y frac. upper 24 bits
-// ebx : y i. lower 7 bits, masked for index
-// ecx : ch = counter, cl = y step i.
-// eax : colormap aligned 256
-// esi : source texture column
-// edi : dest screen
-//
-sh4quadloop:
- movb $0x7f,%dh // prep mask
-
- movb (%esi,%ebx),%ah // fetch texel : colormap number
-sh5: movb 0x12345678(%edi),%al // fetch dest : index into colormap
-
- movl %ebp,C(tystep)
- movl %edi,%ebp
- subl C(vidwidth),%edi
- jmp shinloop
-// .align 4
-shquadloop:
- addl C(tystep),%edx
- adcb %cl,%bl
- andb %dh,%bl
-sh6: addl $2*0x12345678,%ebp
- movb (%eax),%dl
- movb (%esi,%ebx),%ah // fetch texel : colormap number
- movb %dl,(%edi)
- movb (%ebp),%al // fetch dest : index into colormap
-shinloop:
- addl C(tystep),%edx
- adcb %cl,%bl
- andb %dh,%bl
-sh7: addl $2*0x12345678,%edi
- movb (%eax),%dl
- movb (%esi,%ebx),%ah // fetch texel : colormap number
- movb %dl,(%ebp)
- movb (%edi),%al // fetch dest : index into colormap
-
- addl C(tystep),%edx
- adcb %cl,%bl
- andb %dh,%bl
-sh8: addl $2*0x12345678,%ebp
- movb (%eax),%dl
- movb (%esi,%ebx),%ah // fetch texel : colormap number
- movb %dl,(%edi)
- movb (%ebp),%al // fetch dest : index into colormap
-
- addl C(tystep),%edx
- adcb %cl,%bl
- andb %dh,%bl
-sh9: addl $2*0x12345678,%edi
- movb (%eax),%dl
- movb (%esi,%ebx),%ah // fetch texel : colormap number
- movb %dl,(%ebp)
- movb (%edi),%al // fetch dest : index into colormap
-
- decb %ch
- jnz shquadloop
-
-shdone:
- popl %ebx // restore register variables
- popl %edi
- popl %esi
- popl %ebp // restore caller's stack frame pointer
- ret
-
-
-
-//----------------------------------------------------------------------
-//
-// R_DrawWaterColumn : basically it's just a copy of R_DrawColumn,
-// but it uses dc_colormap from dc_yl to dc_yw-1
-// then it uses dc_wcolormap from dc_yw to dc_yh
-//
-// Thus, the 'underwater' part of the walls is remapped to 'water-like'
-// colors.
-//
-//----------------------------------------------------------------------
-
-#ifdef LINUX
- .align 2
-#else
- .align 5
-#endif
-.globl C(R_DrawWaterColumn)
-C(R_DrawWaterColumn):
- pushl %ebp // preserve caller's stack frame pointer
- pushl %esi // preserve register variables
- pushl %edi
- pushl %ebx
-
-//
-// dest = ylookup[dc_yl] + columnofs[dc_x];
-//
- movl C(dc_yl),%ebp
- movl %ebp,%ebx
- movl C(ylookup)(,%ebx,4),%edi
- movl C(dc_x),%ebx
- addl C(columnofs)(,%ebx,4),%edi // edi = dest
-
-//
-// pixelcount = yh - yl + 1
-//
- movl C(dc_yh),%eax
- incl %eax
- subl %ebp,%eax // pixel count
- movl %eax,C(pixelcount) // save for final pixel
- jle wdone // nothing to scale
-
-//
-// frac = dc_texturemid - (centery-dc_yl)*fracstep;
-//
- movl C(dc_iscale),%ecx // fracstep
- movl C(centery),%eax
- subl %ebp,%eax
- imul %ecx,%eax
- movl C(dc_texturemid),%edx
- subl %eax,%edx
- movl %edx,%ebx
- shrl $16,%ebx // frac int.
- andl $0x0000007f,%ebx
- shll $16,%edx // y frac up
-
- movl %ecx,%ebp
- shll $16,%ebp // fracstep f. up
- shrl $16,%ecx // fracstep i. ->cl
- andb $0x7f,%cl
-
- movl C(dc_source),%esi
-
-//
-// lets rock :) !
-//
- movl C(pixelcount),%eax
- movb %al,%dh
- shrl $2,%eax
- movb %al,%ch // quad count
- movl C(dc_wcolormap),%eax
- testb $3,%dh
- jz w4quadloop
-
-//
-// do un-even pixel
-//
- testb $1,%dh
- jz 2f
-
- movb (%esi,%ebx),%al // prep un-even loops
- addl %ebp,%edx // ypos f += ystep f
- adcb %cl,%bl // ypos i += ystep i
- movb (%eax),%dl // colormap texel
- andb $0x7f,%bl // mask 0-127 texture index
- movb %dl,(%edi) // output pixel
- addl C(vidwidth),%edi
-
-//
-// do two non-quad-aligned pixels
-//
-2:
- testb $2,%dh
- jz 3f
-
- movb (%esi,%ebx),%al // fetch source texel
- addl %ebp,%edx // ypos f += ystep f
- adcb %cl,%bl // ypos i += ystep i
- movb (%eax),%dl // colormap texel
- andb $0x7f,%bl // mask 0-127 texture index
- movb %dl,(%edi) // output pixel
-
- movb (%esi,%ebx),%al // fetch source texel
- addl %ebp,%edx // ypos f += ystep f
- adcb %cl,%bl // ypos i += ystep i
- movb (%eax),%dl // colormap texel
- andb $0x7f,%bl // mask 0-127 texture index
- addl C(vidwidth),%edi
- movb %dl,(%edi) // output pixel
-
- addl C(vidwidth),%edi
-
-//
-// test if there was at least 4 pixels
-//
-3:
- testb $0xFF,%ch // test quad count
- jz wdone
-
-//
-// ebp : ystep frac. upper 24 bits
-// edx : y frac. upper 24 bits
-// ebx : y i. lower 7 bits, masked for index
-// ecx : ch = counter, cl = y step i.
-// eax : colormap aligned 256
-// esi : source texture column
-// edi : dest screen
-//
-w4quadloop:
- movb $0x7f,%dh // prep mask
-// .align 4
-wquadloop:
- movb (%esi,%ebx),%al // prep loop
- addl %ebp,%edx // ypos f += ystep f
- adcb %cl,%bl // ypos i += ystep i
- movb (%eax),%dl // colormap texel
- movb %dl,(%edi) // output pixel
- andb $0x7f,%bl // mask 0-127 texture index
-
- movb (%esi,%ebx),%al // fetch source texel
- addl %ebp,%edx
- adcb %cl,%bl
- movb (%eax),%dl
-w1: movb %dl,0x12345678(%edi)
- andb $0x7f,%bl
-
- movb (%esi,%ebx),%al // fetch source texel
- addl %ebp,%edx
- adcb %cl,%bl
- movb (%eax),%dl
-w2: movb %dl,2*0x12345678(%edi)
- andb $0x7f,%bl
-
- movb (%esi,%ebx),%al // fetch source texel
- addl %ebp,%edx
- adcb %cl,%bl
- movb (%eax),%dl
-w3: movb %dl,3*0x12345678(%edi)
- andb $0x7f,%bl
-
-w4: addl $4*0x12345678,%edi
-
- decb %ch
- jnz wquadloop
-
-wdone:
- popl %ebx // restore register variables
- popl %edi
- popl %esi
- popl %ebp // restore caller's stack frame pointer
- ret
-
-
-
-
-
-
-
-//----------------------------------------------------------------------
-//
-// R_DrawSpanNoWrap
-//
-// Horizontal texture mapping, does not remap colors,
-// neither needs to wrap around the source texture.
-//
-// Thus, a special optimisation can be used...
-//
-//----------------------------------------------------------------------
-
- .data
-
-advancetable: .long 0, 0
-#if !defined( LINUX) && !defined( __OS2__)
- .text
-#endif
-#ifdef LINUX
- .align 2
-#else
- .align 4
-#endif
-.globl C(R_DrawSpanNoWrap)
-C(R_DrawSpanNoWrap):
- pushl %ebp // preserve caller's stack frame pointer
- pushl %esi // preserve register variables
- pushl %edi
- pushl %ebx
-
-//
-// find loop count
-//
-
- movl C(ds_x2),%eax
- incl %eax
- subl C(ds_x1),%eax // pixel count
- movl %eax,C(pixelcount) // save for final pixel
- jle htvdone // nothing to scale
-// shrl $1,%eax // double pixel count
-// movl %eax,C(loopcount)
-
-//
-// calculate screen dest
-//
-
- movl C(ds_y),%edi //full destination start address
-
-//
-// set up advancetable
-//
-
- movl C(ds_xstep),%ebp
- movl C(ds_ystep),%ecx
- movl %ecx,%eax
- movl %ebp,%edx
- sarl $16,%edx // xstep >>= 16;
- movl C(vidwidth),%ebx
- sarl $16,%eax // ystep >>= 16;
- jz 0f
- imull %ebx,%eax // (ystep >> 16) * texwidth;
-0:
- addl %edx,%eax // add in xstep
- // (ystep >> 16) * texwidth + (xstep >> 16);
-
- movl %eax,advancetable+4 // advance base in y
- addl %ebx,%eax // ((ystep >> 16) + 1) * texwidth +
- // (xstep >> 16);
- movl %eax,advancetable // advance extra in y
-
- shll $16,%ebp // left-justify xstep fractional part
- movl %ebp,xstep
- shll $16,%ecx // left-justify ystep fractional part
- movl %ecx,ystep
-
-//
-// calculate the texture starting address
-//
- movl C(ds_source),%esi // texture source
-
- movl C(ds_yfrac),%eax
- movl %eax,%edx
- sarl $16,%eax
- movl C(ds_xfrac),%ecx
- imull %ebx,%eax // (yfrac >> 16) * texwidth
- movl %ecx,%ebx
- sarl $16,%ecx
- movl %ecx,%ebp
- addl %eax,%ebp // source = (xfrac >> 16) +
- // ((yfrac >> 16) * texwidth);
-
-//
-// esi : texture source
-// edi : screen dest
-// eax : colormap aligned on 256 boundary, hehehe...
-// ebx : xfrac << 16
-// ecx : used in loop, contains either 0 or -1, *4, offset into advancetable
-// edx : yfrac << 16
-// ebp : offset into texture
-//
-
- shll $16,%edx // yfrac upper word, lower byte will be used
- movl C(ds_colormap),%eax
- shll $16,%ebx // xfrac upper word, lower unused
-
- movl C(pixelcount),%ecx
- shrl $2,%ecx
- movb %cl,%dh // quad pixels count
-
- movl C(pixelcount),%ecx
- andl $3,%ecx
- jz htvquadloop // pixelcount is multiple of 4
- decl %ecx
- jz 1f
- decl %ecx
- jz 2f
-
-//
-// do one to three pixels first
-//
- addl ystep,%edx // yfrac += ystep
- sbbl %ecx,%ecx // turn carry into 0 or -1 if set
- movb (%esi,%ebp),%al // get texture pixel
- addl xstep,%ebx // xfrac += xstep
-// movb (%eax),%dl // pixel goes through colormap
- adcl advancetable+4(,%ecx,4),%ebp // advance source
- movb %al,(%edi) // write pixel dest
-
- incl %edi
-
-2:
- addl ystep,%edx // yfrac += ystep
- sbbl %ecx,%ecx // turn carry into 0 or -1 if set
- movb (%esi,%ebp),%al // get texture pixel
- addl xstep,%ebx // xfrac += xstep
-// movb (%eax),%dl // pixel goes through colormap
- adcl advancetable+4(,%ecx,4),%ebp // advance source
- movb %al,(%edi) // write pixel dest
-
- incl %edi
-
-1:
- addl ystep,%edx // yfrac += ystep
- sbbl %ecx,%ecx // turn carry into 0 or -1 if set
- movb (%esi,%ebp),%al // get texture pixel
- addl xstep,%ebx // xfrac += xstep
-// movb (%eax),%dl // pixel goes through colormap
- adcl advancetable+4(,%ecx,4),%ebp // advance source
- movb %al,(%edi) // write pixel dest
-
- incl %edi
-
-//
-// test if there was at least 4 pixels
-//
- testb $0xFF,%dh
- jz htvdone
-
-//
-// two pixels per loop
-// U
-// V
-htvquadloop:
- addl ystep,%edx // yfrac += ystep
- sbbl %ecx,%ecx // turn carry into 0 or -1 if set
- movb (%esi,%ebp),%al // get texture pixel
- addl xstep,%ebx // xfrac += xstep
-// movb (%eax),%dl // pixel goes through colormap
- adcl advancetable+4(,%ecx,4),%ebp // advance source
- movb %al,(%edi) // write pixel dest
-
- addl ystep,%edx
- sbbl %ecx,%ecx
- movb (%esi,%ebp),%al
- addl xstep,%ebx
-// movb (%eax),%dl
- adcl advancetable+4(,%ecx,4),%ebp
- movb %al,1(%edi)
-
- addl ystep,%edx
- sbbl %ecx,%ecx
- movb (%esi,%ebp),%al
- addl xstep,%ebx
-// movb (%eax),%dl
- adcl advancetable+4(,%ecx,4),%ebp
- movb %al,2(%edi)
-
- addl ystep,%edx
- sbbl %ecx,%ecx
- movb (%esi,%ebp),%al
- addl xstep,%ebx
-// movb (%eax),%dl
- adcl advancetable+4(,%ecx,4),%ebp
- movb %al,3(%edi)
-
- addl $4, %edi
- incl %ecx //dummy
-
- decb %dh
- jnz htvquadloop // paire dans V-pipe
-
-htvdone:
- popl %ebx // restore register variables
- popl %edi
- popl %esi
- popl %ebp // restore caller's stack frame pointer
- ret
-
-
-//.endif
-
-#ifdef HORIZONTALDRAW
-// void R_RotateBuffere (void)
-
-#ifdef LINUX
- .align 2
-#else
- .align 4
-#endif
-.globl C(R_RotateBufferasm)
-C(R_RotateBufferasm):
- pushl %ebp // preserve caller's stack frame pointer
- pushl %esi // preserve register variables
- pushl %edi
- pushl %ebx
-
- movl C(dc_source),%esi
- movl C(dc_colormap),%edi
-
- movl $200,%edx
-ra2:
- movl $40,%ecx
-ra:
- movb -2*200(%esi),%al
- movb -6*200(%esi),%bl
- movb -3*200(%esi),%ah
- movb -7*200(%esi),%bh
- shll $16,%eax
- shll $16,%ebx
- movb (%esi),%al
- movb -4*200(%esi),%bl
- movb -1*200(%esi),%ah
- movb -5*200(%esi),%bh
- movl %eax,(%edi)
- subl $8*200,%esi
- movl %ebx,4(%edi)
- addl $8,%edi
- decl %ecx
- jnz ra
-
- addl $320*200+1,%esi //32*480 passe a la ligne suivante
-// addl 320-32,%edi
-
- decl %edx
- jnz ra2
-
- pop %ebp // preserve caller's stack frame pointer
- pop %esi // preserve register variables
- pop %edi
- pop %ebx
- ret
-#endif
diff --git a/src/tmap_asm.s b/src/tmap_asm.s
deleted file mode 100644
index 99cb0b62..00000000
--- a/src/tmap_asm.s
+++ /dev/null
@@ -1,322 +0,0 @@
-// SONIC ROBO BLAST 2
-//-----------------------------------------------------------------------------
-// Copyright (C) 1998-2000 by DooM Legacy Team.
-// Copyright (C) 1999-2018 by Sonic Team Junior.
-//
-// This program is free software distributed under the
-// terms of the GNU General Public License, version 2.
-// See the 'LICENSE' file for more details.
-//-----------------------------------------------------------------------------
-/// \file tmap_asm.s
-/// \brief ???
-
-//.comm _dc_colormap,4
-//.comm _dc_x,4
-//.comm _dc_yl,4
-//.comm _dc_yh,4
-//.comm _dc_iscale,4
-//.comm _dc_texturemid,4
-//.comm _dc_source,4
-//.comm _ylookup,4
-//.comm _columnofs,4
-//.comm _loopcount,4
-//.comm _pixelcount,4
-.data
-_pixelcount:
-.long 0x00000000
-_loopcount:
-.long 0x00000000
-.align 8
-_mmxcomm:
-.long 0x00000000
-.text
-
- .align 4
-.globl _R_DrawColumn8_NOMMX
-_R_DrawColumn8_NOMMX:
- pushl %ebp
- pushl %esi
- pushl %edi
- pushl %ebx
- movl _dc_yl,%edx
- movl _dc_yh,%eax
- subl %edx,%eax
- leal 1(%eax),%ebx
- testl %ebx,%ebx
- jle rdc8ndone
- movl _dc_x,%eax
- movl _ylookup, %edi
- movl (%edi,%edx,4),%esi
- movl _columnofs, %edi
- addl (%edi,%eax,4),%esi
- movl _dc_iscale,%edi
- movl %edx,%eax
- imull %edi,%eax
- movl _dc_texturemid,%ecx
- addl %eax,%ecx
-
- movl _dc_source,%ebp
- xorl %edx, %edx
- subl $0x12345678, %esi
-.globl rdc8nwidth1
-rdc8nwidth1:
- .align 4,0x90
-rdc8nloop:
- movl %ecx,%eax
- shrl $16,%eax
- addl %edi,%ecx
- andl $127,%eax
- addl $0x12345678,%esi
-.globl rdc8nwidth2
-rdc8nwidth2:
- movb (%eax,%ebp),%dl
- movl _dc_colormap,%eax
- movb (%eax,%edx),%al
- movb %al,(%esi)
- decl %ebx
- jne rdc8nloop
-rdc8ndone:
- popl %ebx
- popl %edi
- popl %esi
- popl %ebp
- ret
-
-//
-// Optimised specifically for P54C/P55C (aka Pentium with/without MMX)
-// By ES 1998/08/01
-//
-
-.globl _R_DrawColumn_8_Pentium
-_R_DrawColumn_8_Pentium:
- pushl %ebp
- pushl %ebx
- pushl %esi
- pushl %edi
- movl _dc_yl,%eax // Top pixel
- movl _dc_yh,%ebx // Bottom pixel
- movl _ylookup, %edi
- movl (%edi,%ebx,4),%ecx
- subl %eax,%ebx // ebx=number of pixels-1
- jl rdc8pdone // no pixel to draw, done
- jnz rdc8pmany
- movl _dc_x,%edx // Special case: only one pixel
- movl _columnofs, %edi
- addl (%edi,%edx,4),%ecx // dest pixel at (%ecx)
- movl _dc_iscale,%esi
- imull %esi,%eax
- movl _dc_texturemid,%edi
- addl %eax,%edi // texture index in edi
- movl _dc_colormap,%edx
- shrl $16, %edi
- movl _dc_source,%ebp
- andl $127,%edi
- movb (%edi,%ebp),%dl // read texture pixel
- movb (%edx),%al // lookup for light
- movb %al,0(%ecx) // write it
- jmp rdc8pdone // done!
-.align 4, 0x90
-rdc8pmany: // draw >1 pixel
- movl _dc_x,%edx
- movl _columnofs, %edi
- movl (%edi,%edx,4),%edx
- leal 0x12345678(%edx, %ecx), %edi // edi = two pixels above bottom
-.globl rdc8pwidth5
-rdc8pwidth5: // DeadBeef = -2*SCREENWIDTH
- movl _dc_iscale,%edx // edx = fracstep
- imull %edx,%eax
- shll $9, %edx // fixme: Should get 7.25 fix as input
- movl _dc_texturemid,%ecx
- addl %eax,%ecx // ecx = frac
- movl _dc_colormap,%eax // eax = lighting/special effects LUT
- shll $9, %ecx
- movl _dc_source,%esi // esi = source ptr
-
- imull $0x12345678, %ebx // ebx = negative offset to pixel
-.globl rdc8pwidth6
-rdc8pwidth6: // DeadBeef = -SCREENWIDTH
-
-// Begin the calculation of the two first pixels
- leal (%ecx, %edx), %ebp
- shrl $25, %ecx
- movb (%esi, %ecx), %al
- leal (%edx, %ebp), %ecx
- shrl $25, %ebp
- movb (%eax), %dl
-
-// The main loop
-rdc8ploop:
- movb (%esi,%ebp), %al // load 1
- leal (%ecx, %edx), %ebp // calc frac 3
-
- shrl $25, %ecx // shift frac 2
- movb %dl, 0x12345678(%edi, %ebx)// store 0
-.globl rdc8pwidth1
-rdc8pwidth1: // DeadBeef = 2*SCREENWIDTH
-
- movb (%eax), %al // lookup 1
-
- movb %al, 0x12345678(%edi, %ebx)// store 1
-.globl rdc8pwidth2
-rdc8pwidth2: // DeadBeef = 3*SCREENWIDTH
- movb (%esi, %ecx), %al // load 2
-
- leal (%ebp, %edx), %ecx // calc frac 4
-
- shrl $25, %ebp // shift frac 3
- movb (%eax), %dl // lookup 2
-
- addl $0x12345678, %ebx // counter
-.globl rdc8pwidth3
-rdc8pwidth3: // DeadBeef = 2*SCREENWIDTH
- jl rdc8ploop // loop
-
-// End of loop. Write extra pixel or just exit.
- jnz rdc8pdone
- movb %dl, 0x12345678(%edi, %ebx)// Write odd pixel
-.globl rdc8pwidth4
-rdc8pwidth4: // DeadBeef = 2*SCREENWIDTH
-
-rdc8pdone:
-
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-
-//
-// MMX asm version, optimised for K6
-// By ES 1998/07/05
-//
-
-.globl _R_DrawColumn_8_K6_MMX
-_R_DrawColumn_8_K6_MMX:
- pushl %ebp
- pushl %ebx
- pushl %esi
- pushl %edi
-
- movl %esp, %eax // Push 8 or 12, so that (%esp) gets aligned by 8
- andl $7,%eax
- addl $8,%eax
- movl %eax, _mmxcomm // Temp storage in mmxcomm: (%esp) is used instead
- subl %eax,%esp
-
- movl _dc_yl,%edx // Top pixel
- movl _dc_yh,%ebx // Bottom pixel
- movl _ylookup, %edi
- movl (%edi,%ebx,4),%ecx
- subl %edx,%ebx // ebx=number of pixels-1
- jl 0x12345678 // no pixel to draw, done
-.globl rdc8moffs1
-rdc8moffs1:
- jnz rdc8mmany
- movl _dc_x,%eax // Special case: only one pixel
- movl _columnofs, %edi
- addl (%edi,%eax,4),%ecx // dest pixel at (%ecx)
- movl _dc_iscale,%esi
- imull %esi,%edx
- movl _dc_texturemid,%edi
- addl %edx,%edi // texture index in edi
- movl _dc_colormap,%edx
- shrl $16, %edi
- movl _dc_source,%ebp
- andl $127,%edi
- movb (%edi,%ebp),%dl // read texture pixel
- movb (%edx),%al // lookup for light
- movb %al,0(%ecx) // write it
- jmp rdc8mdone // done!
-.globl rdc8moffs2
-rdc8moffs2:
-.align 4, 0x90
-rdc8mmany: // draw >1 pixel
- movl _dc_x,%eax
- movl _columnofs, %edi
- movl (%edi,%eax,4),%eax
- leal 0x12345678(%eax, %ecx), %esi // esi = two pixels above bottom
-.globl rdc8mwidth3
-rdc8mwidth3: // DeadBeef = -2*SCREENWIDTH
- movl _dc_iscale,%ecx // ecx = fracstep
- imull %ecx,%edx
- shll $9, %ecx // fixme: Should get 7.25 fix as input
- movl _dc_texturemid,%eax
- addl %edx,%eax // eax = frac
- movl _dc_colormap,%edx // edx = lighting/special effects LUT
- shll $9, %eax
- leal (%ecx, %ecx), %edi
- movl _dc_source,%ebp // ebp = source ptr
- movl %edi, 0(%esp) // Start moving frac and fracstep to MMX regs
-
- imull $0x12345678, %ebx // ebx = negative offset to pixel
-.globl rdc8mwidth5
-rdc8mwidth5: // DeadBeef = -SCREENWIDTH
-
- movl %edi, 4(%esp)
- leal (%eax, %ecx), %edi
- movq 0(%esp), %mm1 // fracstep:fracstep in mm1
- movl %eax, 0(%esp)
- shrl $25, %eax
- movl %edi, 4(%esp)
- movzbl (%ebp, %eax), %eax
- movq 0(%esp), %mm0 // frac:frac in mm0
-
- paddd %mm1, %mm0
- shrl $25, %edi
- movq %mm0, %mm2
- psrld $25, %mm2 // texture index in mm2
- paddd %mm1, %mm0
- movq %mm2, 0(%esp)
-
-.globl rdc8mloop
-rdc8mloop: // The main loop
- movq %mm0, %mm2 // move 4-5 to temp reg
- movzbl (%ebp, %edi), %edi // read 1
-
- psrld $25, %mm2 // shift 4-5
- movb (%edx,%eax), %cl // lookup 0
-
- movl 0(%esp), %eax // load 2
- addl $0x12345678, %ebx // counter
-.globl rdc8mwidth2
-rdc8mwidth2: // DeadBeef = 2*SCREENWIDTH
-
- movb %cl, (%esi, %ebx) // write 0
- movb (%edx,%edi), %ch // lookup 1
-
- movb %ch, 0x12345678(%esi, %ebx) // write 1
-.globl rdc8mwidth1
-rdc8mwidth1: // DeadBeef = SCREENWIDTH
- movl 4(%esp), %edi // load 3
-
- paddd %mm1, %mm0 // frac 6-7
- movzbl (%ebp, %eax), %eax // lookup 2
-
- movq %mm2, 0(%esp) // store texture index 4-5
- jl rdc8mloop
-
- jnz rdc8mno_odd
- movb (%edx,%eax), %cl // write the last odd pixel
- movb %cl, 0x12345678(%esi)
-.globl rdc8mwidth4
-rdc8mwidth4: // DeadBeef = 2*SCREENWIDTH
-rdc8mno_odd:
-
-.globl rdc8mdone
-rdc8mdone:
- emms
-
- addl _mmxcomm, %esp
- popl %edi
- popl %esi
- popl %ebx
- popl %ebp
- ret
-
-// Need some extra space to align run-time
-.globl R_DrawColumn_8_K6_MMX_end
-R_DrawColumn_8_K6_MMX_end:
-nop;nop;nop;nop;nop;nop;nop;nop;
-nop;nop;nop;nop;nop;nop;nop;nop;
-nop;nop;nop;nop;nop;nop;nop;nop;
-nop;nop;nop;nop;nop;nop;nop;
diff --git a/src/tmap_mmx.nas b/src/tmap_mmx.nas
deleted file mode 100644
index 39380a06..00000000
--- a/src/tmap_mmx.nas
+++ /dev/null
@@ -1,674 +0,0 @@
-;; SONIC ROBO BLAST 2
-;;-----------------------------------------------------------------------------
-;; Copyright (C) 1998-2000 by DOSDOOM.
-;; Copyright (C) 2010-2018 by Sonic Team Junior.
-;;
-;; This program is free software distributed under the
-;; terms of the GNU General Public License, version 2.
-;; See the 'LICENSE' file for more details.
-;;-----------------------------------------------------------------------------
-;; FILE:
-;; tmap_mmx.nas
-;; DESCRIPTION:
-;; Assembler optimised rendering code for software mode, using SIMD
-;; instructions.
-;; Draw wall columns.
-
-
-[BITS 32]
-
-%define FRACBITS 16
-%define TRANSPARENTPIXEL 247
-
-%ifdef LINUX
-%macro cextern 1
-[extern %1]
-%endmacro
-
-%macro cglobal 1
-[global %1]
-%endmacro
-
-%else
-%macro cextern 1
-%define %1 _%1
-[extern %1]
-%endmacro
-
-%macro cglobal 1
-%define %1 _%1
-[global %1]
-%endmacro
-
-%endif
-
-
-; The viddef_s structure. We only need the width field.
-struc viddef_s
- resb 12
-.width: resb 4
- resb 44
-endstruc
-
-
-;; externs
-;; columns
-cextern dc_colormap
-cextern dc_x
-cextern dc_yl
-cextern dc_yh
-cextern dc_iscale
-cextern dc_texturemid
-cextern dc_texheight
-cextern dc_source
-cextern dc_hires
-cextern centery
-cextern centeryfrac
-cextern dc_transmap
-
-cextern R_DrawColumn_8_ASM
-cextern R_Draw2sMultiPatchColumn_8_ASM
-
-;; spans
-cextern nflatshiftup
-cextern nflatxshift
-cextern nflatyshift
-cextern nflatmask
-cextern ds_xfrac
-cextern ds_yfrac
-cextern ds_xstep
-cextern ds_ystep
-cextern ds_x1
-cextern ds_x2
-cextern ds_y
-cextern ds_source
-cextern ds_colormap
-
-cextern ylookup
-cextern columnofs
-cextern vid
-
-[SECTION .data]
-
-nflatmask64 dq 0
-
-
-[SECTION .text]
-
-;;----------------------------------------------------------------------
-;;
-;; R_DrawColumn : 8bpp column drawer
-;;
-;; MMX column drawer.
-;;
-;;----------------------------------------------------------------------
-;; eax = accumulator
-;; ebx = colormap
-;; ecx = count
-;; edx = accumulator
-;; esi = source
-;; edi = dest
-;; ebp = vid.width
-;; mm0 = accumulator
-;; mm1 = heightmask, twice
-;; mm2 = 2 * fracstep, twice
-;; mm3 = pair of consecutive fracs
-;;----------------------------------------------------------------------
-
-
-cglobal R_DrawColumn_8_MMX
-R_DrawColumn_8_MMX:
- push ebp ;; preserve caller's stack frame pointer
- push esi ;; preserve register variables
- push edi
- push ebx
-
-;;
-;; Our algorithm requires that the texture height be a power of two.
-;; If not, fall back to the non-MMX drawer.
-;;
-.texheightcheck:
- mov edx, [dc_texheight]
- sub edx, 1 ;; edx = heightmask
- test edx, [dc_texheight]
- jnz near .usenonMMX
-
- mov ebp, edx ;; Keep a copy of heightmask in a
- ;; GPR for the time being.
-
-;;
-;; Fill mm1 with heightmask
-;;
- movd mm1, edx ;; low dword = heightmask
- punpckldq mm1, mm1 ;; copy low dword to high dword
-
-;;
-;; dest = ylookup[dc_yl] + columnofs[dc_x];
-;;
- mov eax, [dc_yl]
- mov edi, [ylookup+eax*4]
- mov ebx, [dc_x]
- add edi, [columnofs+ebx*4] ;; edi = dest
-
-
-;;
-;; pixelcount = yh - yl + 1
-;;
- mov ecx, [dc_yh]
- add ecx, 1
- sub ecx, eax ;; pixel count
- jle near .done ;; nothing to scale
-
-;;
-;; fracstep = dc_iscale;
-;;
- movd mm2, [dc_iscale] ;; fracstep in low dword
- punpckldq mm2, mm2 ;; copy to high dword
-
- mov ebx, [dc_colormap]
- mov esi, [dc_source]
-
-;;
-;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
-;;
- ;; eax == dc_yl already
- shl eax, FRACBITS
- sub eax, [centeryfrac]
- imul dword [dc_iscale]
- shrd eax, edx, FRACBITS
- add eax, [dc_texturemid]
-
-;;
-;; if (dc_hires) frac = 0;
-;;
- test byte [dc_hires], 0x01
- jz .mod2
- xor eax, eax
-
-
-;;
-;; Do mod-2 pixel.
-;;
-.mod2:
- test ecx, 1
- jz .pairprepare
- mov edx, eax ;; edx = frac
- add eax, [dc_iscale] ;; eax += fracstep
- sar edx, FRACBITS
- and edx, ebp ;; edx &= heightmask
- movzx edx, byte [esi + edx]
- movzx edx, byte [ebx + edx]
- mov [edi], dl
-
- add edi, [vid + viddef_s.width]
- sub ecx, 1
- jz .done
-
-.pairprepare:
-;;
-;; Prepare for the main loop.
-;;
- movd mm3, eax ;; Low dword = frac
- movq mm4, mm3 ;; Copy to intermediate register
- paddd mm4, mm2 ;; dwords of mm4 += fracstep
- punpckldq mm3, mm4 ;; Low dword = first frac, high = second
- pslld mm2, 1 ;; fracstep *= 2
-
-;;
-;; ebp = vid.width
-;;
- mov ebp, [vid + viddef_s.width]
-
- align 16
-.pairloop:
- movq mm0, mm3 ;; 3B 1u.
- psrad mm0, FRACBITS ;; 4B 1u.
- pand mm0, mm1 ;; 3B 1u. frac &= heightmask
- paddd mm3, mm2 ;; 3B 1u. frac += fracstep
-
- movd eax, mm0 ;; 3B 1u. Get first frac
-;; IFETCH boundary
- movzx eax, byte [esi + eax] ;; 4B 1u. Texture map
- movzx eax, byte [ebx + eax] ;; 4B 1u. Colormap
-
- punpckhdq mm0, mm0 ;; 3B 1(2)u. low dword = high dword
- movd edx, mm0 ;; 3B 1u. Get second frac
- mov [edi], al ;; 2B 1(2)u. First pixel
-;; IFETCH boundary
-
- movzx edx, byte [esi + edx] ;; 4B 1u. Texture map
- movzx edx, byte [ebx + edx] ;; 4B 1u. Colormap
- mov [edi + 1*ebp], dl ;; 3B 1(2)u. Second pixel
-
- lea edi, [edi + 2*ebp] ;; 3B 1u. edi += 2 * vid.width
-;; IFETCH boundary
- sub ecx, 2 ;; 3B 1u. count -= 2
- jnz .pairloop ;; 2B 1u. if(count != 0) goto .pairloop
-
-
-.done:
-;;
-;; Clear MMX state, or else FPU operations will go badly awry.
-;;
- emms
-
- pop ebx
- pop edi
- pop esi
- pop ebp
- ret
-
-.usenonMMX:
- call R_DrawColumn_8_ASM
- jmp .done
-
-
-;;----------------------------------------------------------------------
-;;
-;; R_Draw2sMultiPatchColumn : Like R_DrawColumn, but omits transparent
-;; pixels.
-;;
-;; MMX column drawer.
-;;
-;;----------------------------------------------------------------------
-;; eax = accumulator
-;; ebx = colormap
-;; ecx = count
-;; edx = accumulator
-;; esi = source
-;; edi = dest
-;; ebp = vid.width
-;; mm0 = accumulator
-;; mm1 = heightmask, twice
-;; mm2 = 2 * fracstep, twice
-;; mm3 = pair of consecutive fracs
-;;----------------------------------------------------------------------
-
-
-cglobal R_Draw2sMultiPatchColumn_8_MMX
-R_Draw2sMultiPatchColumn_8_MMX:
- push ebp ;; preserve caller's stack frame pointer
- push esi ;; preserve register variables
- push edi
- push ebx
-
-;;
-;; Our algorithm requires that the texture height be a power of two.
-;; If not, fall back to the non-MMX drawer.
-;;
-.texheightcheck:
- mov edx, [dc_texheight]
- sub edx, 1 ;; edx = heightmask
- test edx, [dc_texheight]
- jnz near .usenonMMX
-
- mov ebp, edx ;; Keep a copy of heightmask in a
- ;; GPR for the time being.
-
-;;
-;; Fill mm1 with heightmask
-;;
- movd mm1, edx ;; low dword = heightmask
- punpckldq mm1, mm1 ;; copy low dword to high dword
-
-;;
-;; dest = ylookup[dc_yl] + columnofs[dc_x];
-;;
- mov eax, [dc_yl]
- mov edi, [ylookup+eax*4]
- mov ebx, [dc_x]
- add edi, [columnofs+ebx*4] ;; edi = dest
-
-
-;;
-;; pixelcount = yh - yl + 1
-;;
- mov ecx, [dc_yh]
- add ecx, 1
- sub ecx, eax ;; pixel count
- jle near .done ;; nothing to scale
-;;
-;; fracstep = dc_iscale;
-;;
- movd mm2, [dc_iscale] ;; fracstep in low dword
- punpckldq mm2, mm2 ;; copy to high dword
-
- mov ebx, [dc_colormap]
- mov esi, [dc_source]
-
-;;
-;; frac = (dc_texturemid + FixedMul((dc_yl << FRACBITS) - centeryfrac, fracstep));
-;;
- ;; eax == dc_yl already
- shl eax, FRACBITS
- sub eax, [centeryfrac]
- imul dword [dc_iscale]
- shrd eax, edx, FRACBITS
- add eax, [dc_texturemid]
-
-;;
-;; if (dc_hires) frac = 0;
-;;
- test byte [dc_hires], 0x01
- jz .mod2
- xor eax, eax
-
-
-;;
-;; Do mod-2 pixel.
-;;
-.mod2:
- test ecx, 1
- jz .pairprepare
- mov edx, eax ;; edx = frac
- add eax, [dc_iscale] ;; eax += fracstep
- sar edx, FRACBITS
- and edx, ebp ;; edx &= heightmask
- movzx edx, byte [esi + edx]
- cmp dl, TRANSPARENTPIXEL
- je .nextmod2
- movzx edx, byte [ebx + edx]
- mov [edi], dl
-
-.nextmod2:
- add edi, [vid + viddef_s.width]
- sub ecx, 1
- jz .done
-
-.pairprepare:
-;;
-;; Prepare for the main loop.
-;;
- movd mm3, eax ;; Low dword = frac
- movq mm4, mm3 ;; Copy to intermediate register
- paddd mm4, mm2 ;; dwords of mm4 += fracstep
- punpckldq mm3, mm4 ;; Low dword = first frac, high = second
- pslld mm2, 1 ;; fracstep *= 2
-
-;;
-;; ebp = vid.width
-;;
- mov ebp, [vid + viddef_s.width]
-
- align 16
-.pairloop:
- movq mm0, mm3 ;; 3B 1u.
- psrad mm0, FRACBITS ;; 4B 1u.
- pand mm0, mm1 ;; 3B 1u. frac &= heightmask
- paddd mm3, mm2 ;; 3B 1u. frac += fracstep
-
- movd eax, mm0 ;; 3B 1u. Get first frac
-;; IFETCH boundary
- movzx eax, byte [esi + eax] ;; 4B 1u. Texture map
- punpckhdq mm0, mm0 ;; 3B 1(2)u. low dword = high dword
- movd edx, mm0 ;; 3B 1u. Get second frac
- cmp al, TRANSPARENTPIXEL ;; 2B 1u.
- je .secondinpair ;; 2B 1u.
-;; IFETCH boundary
- movzx eax, byte [ebx + eax] ;; 4B 1u. Colormap
- mov [edi], al ;; 2B 1(2)u. First pixel
-
-.secondinpair:
- movzx edx, byte [esi + edx] ;; 4B 1u. Texture map
- cmp dl, TRANSPARENTPIXEL ;; 2B 1u.
- je .nextpair ;; 2B 1u.
-;; IFETCH boundary
- movzx edx, byte [ebx + edx] ;; 4B 1u. Colormap
- mov [edi + 1*ebp], dl ;; 3B 1(2)u. Second pixel
-
-.nextpair:
- lea edi, [edi + 2*ebp] ;; 3B 1u. edi += 2 * vid.width
- sub ecx, 2 ;; 3B 1u. count -= 2
- jnz .pairloop ;; 2B 1u. if(count != 0) goto .pairloop
-
-
-.done:
-;;
-;; Clear MMX state, or else FPU operations will go badly awry.
-;;
- emms
-
- pop ebx
- pop edi
- pop esi
- pop ebp
- ret
-
-.usenonMMX:
- call R_Draw2sMultiPatchColumn_8_ASM
- jmp .done
-
-
-;;----------------------------------------------------------------------
-;;
-;; R_DrawSpan : 8bpp span drawer
-;;
-;; MMX span drawer.
-;;
-;;----------------------------------------------------------------------
-;; eax = accumulator
-;; ebx = colormap
-;; ecx = count
-;; edx = accumulator
-;; esi = source
-;; edi = dest
-;; ebp = two pixels
-;; mm0 = accumulator
-;; mm1 = xposition
-;; mm2 = yposition
-;; mm3 = 2 * xstep
-;; mm4 = 2 * ystep
-;; mm5 = nflatxshift
-;; mm6 = nflatyshift
-;; mm7 = accumulator
-;;----------------------------------------------------------------------
-
-cglobal R_DrawSpan_8_MMX
-R_DrawSpan_8_MMX:
- push ebp ;; preserve caller's stack frame pointer
- push esi ;; preserve register variables
- push edi
- push ebx
-
-;;
-;; esi = ds_source
-;; ebx = ds_colormap
-;;
- mov esi, [ds_source]
- mov ebx, [ds_colormap]
-
-;;
-;; edi = ylookup[ds_y] + columnofs[ds_x1]
-;;
- mov eax, [ds_y]
- mov edi, [ylookup + eax*4]
- mov edx, [ds_x1]
- add edi, [columnofs + edx*4]
-
-;;
-;; ecx = ds_x2 - ds_x1 + 1
-;;
- mov ecx, [ds_x2]
- sub ecx, edx
- add ecx, 1
-
-;;
-;; Needed for fracs and steps
-;;
- movd mm7, [nflatshiftup]
-
-;;
-;; mm3 = xstep
-;;
- movd mm3, [ds_xstep]
- pslld mm3, mm7
- punpckldq mm3, mm3
-
-;;
-;; mm4 = ystep
-;;
- movd mm4, [ds_ystep]
- pslld mm4, mm7
- punpckldq mm4, mm4
-
-;;
-;; mm1 = pair of consecutive xpositions
-;;
- movd mm1, [ds_xfrac]
- pslld mm1, mm7
- movq mm6, mm1
- paddd mm6, mm3
- punpckldq mm1, mm6
-
-;;
-;; mm2 = pair of consecutive ypositions
-;;
- movd mm2, [ds_yfrac]
- pslld mm2, mm7
- movq mm6, mm2
- paddd mm6, mm4
- punpckldq mm2, mm6
-
-;;
-;; mm5 = nflatxshift
-;; mm6 = nflatyshift
-;;
- movd mm5, [nflatxshift]
- movd mm6, [nflatyshift]
-
-;;
-;; Mask is in memory due to lack of registers.
-;;
- mov eax, [nflatmask]
- mov [nflatmask64], eax
- mov [nflatmask64 + 4], eax
-
-
-;;
-;; Go until we reach a dword boundary.
-;;
-.unaligned:
- test edi, 3
- jz .alignedprep
-.stragglers:
- cmp ecx, 0
- je .done ;; If ecx == 0, we're finished.
-
-;;
-;; eax = ((yposition >> nflatyshift) & nflatmask) | (xposition >> nflatxshift)
-;;
- movq mm0, mm1 ;; mm0 = xposition
- movq mm7, mm2 ;; mm7 = yposition
- paddd mm1, mm3 ;; xposition += xstep (once!)
- paddd mm2, mm4 ;; yposition += ystep (once!)
- psrld mm0, mm5 ;; shift
- psrld mm7, mm6 ;; shift
- pand mm7, [nflatmask64] ;; mask
- por mm0, mm7 ;; or x and y together
-
- movd eax, mm0 ;; eax = index of first pixel
- movzx eax, byte [esi + eax] ;; al = source[eax]
- movzx eax, byte [ebx + eax] ;; al = colormap[al]
-
- mov [edi], al
- add edi, 1
-
- sub ecx, 1
- jmp .unaligned
-
-
-.alignedprep:
-;;
-;; We can double the steps now.
-;;
- pslld mm3, 1
- pslld mm4, 1
-
-
-;;
-;; Generate chunks of four pixels.
-;;
-.alignedloop:
-
-;;
-;; Make sure we have at least four pixels.
-;;
- cmp ecx, 4
- jl .prestragglers
-
-;;
-;; First two pixels.
-;;
- movq mm0, mm1 ;; mm0 = xposition
- movq mm7, mm2 ;; mm7 = yposition
- paddd mm1, mm3 ;; xposition += xstep
- paddd mm2, mm4 ;; yposition += ystep
- psrld mm0, mm5 ;; shift
- psrld mm7, mm6 ;; shift
- pand mm7, [nflatmask64] ;; mask
- por mm0, mm7 ;; or x and y together
-
- movd eax, mm0 ;; eax = index of first pixel
- movzx eax, byte [esi + eax] ;; al = source[eax]
- movzx ebp, byte [ebx + eax] ;; ebp = colormap[al]
-
- punpckhdq mm0, mm0 ;; both dwords = high dword
- movd eax, mm0 ;; eax = index of second pixel
- movzx eax, byte [esi + eax] ;; al = source[eax]
- movzx eax, byte [ebx + eax] ;; al = colormap[al]
- shl eax, 8 ;; get pixel in right byte
- or ebp, eax ;; put pixel in ebp
-
-;;
-;; Next two pixels.
-;;
- movq mm0, mm1 ;; mm0 = xposition
- movq mm7, mm2 ;; mm7 = yposition
- paddd mm1, mm3 ;; xposition += xstep
- paddd mm2, mm4 ;; yposition += ystep
- psrld mm0, mm5 ;; shift
- psrld mm7, mm6 ;; shift
- pand mm7, [nflatmask64] ;; mask
- por mm0, mm7 ;; or x and y together
-
- movd eax, mm0 ;; eax = index of third pixel
- movzx eax, byte [esi + eax] ;; al = source[eax]
- movzx eax, byte [ebx + eax] ;; al = colormap[al]
- shl eax, 16 ;; get pixel in right byte
- or ebp, eax ;; put pixel in ebp
-
- punpckhdq mm0, mm0 ;; both dwords = high dword
- movd eax, mm0 ;; eax = index of second pixel
- movzx eax, byte [esi + eax] ;; al = source[eax]
- movzx eax, byte [ebx + eax] ;; al = colormap[al]
- shl eax, 24 ;; get pixel in right byte
- or ebp, eax ;; put pixel in ebp
-
-;;
-;; Write pixels.
-;;
- mov [edi], ebp
- add edi, 4
-
- sub ecx, 4
- jmp .alignedloop
-
-.prestragglers:
-;;
-;; Back to one step at a time.
-;;
- psrad mm3, 1
- psrad mm4, 1
- jmp .stragglers
-
-.done:
-;;
-;; Clear MMX state, or else FPU operations will go badly awry.
-;;
- emms
-
- pop ebx
- pop edi
- pop esi
- pop ebp
- ret
diff --git a/src/tmap_vc.nas b/src/tmap_vc.nas
deleted file mode 100644
index e943d48d..00000000
--- a/src/tmap_vc.nas
+++ /dev/null
@@ -1,48 +0,0 @@
-;; SONIC ROBO BLAST 2
-;;-----------------------------------------------------------------------------
-;; Copyright (C) 1998-2000 by DooM Legacy Team.
-;; Copyright (C) 1999-2018 by Sonic Team Junior.
-;;
-;; This program is free software distributed under the
-;; terms of the GNU General Public License, version 2.
-;; See the 'LICENSE' file for more details.
-;;-----------------------------------------------------------------------------
-;; FILE:
-;; tmap_vc.nas
-;; DESCRIPTION:
-;; Assembler optimised math code for Visual C++.
-
-
-[BITS 32]
-
-%macro cglobal 1
-%define %1 _%1
-[global %1]
-%endmacro
-
-[SECTION .text write]
-
-;----------------------------------------------------------------------------
-;fixed_t FixedMul (fixed_t a, fixed_t b)
-;----------------------------------------------------------------------------
-cglobal FixedMul
-; align 16
-FixedMul:
- mov eax,[esp+4]
- imul dword [esp+8]
- shrd eax,edx,16
- ret
-
-;----------------------------------------------------------------------------
-;fixed_t FixedDiv2 (fixed_t a, fixed_t b);
-;----------------------------------------------------------------------------
-cglobal FixedDiv2
-; align 16
-FixedDiv2:
- mov eax,[esp+4]
- mov edx,eax ;; these two instructions allow the next
- sar edx,31 ;; two to pair, on the Pentium processor.
- shld edx,eax,16
- sal eax,16
- idiv dword [esp+8]
- ret
diff --git a/src/v_video.c b/src/v_video.c
index 09a473ed..0046aae7 100644
--- a/src/v_video.c
+++ b/src/v_video.c
@@ -266,12 +266,6 @@ static void CV_Gammaxxx_ONChange(void)
#endif
-#if defined (__GNUC__) && defined (__i386__) && !defined (NOASM) && !defined (__APPLE__) && !defined (NORUSEASM)
-void VID_BlitLinearScreen_ASM(const UINT8 *srcptr, UINT8 *destptr, INT32 width, INT32 height, size_t srcrowbytes,
- size_t destrowbytes);
-#define HAVE_VIDCOPY
-#endif
-
static void CV_constextsize_OnChange(void)
{
con_recalc = true;
@@ -284,9 +278,6 @@ static void CV_constextsize_OnChange(void)
void VID_BlitLinearScreen(const UINT8 *srcptr, UINT8 *destptr, INT32 width, INT32 height, size_t srcrowbytes,
size_t destrowbytes)
{
-#ifdef HAVE_VIDCOPY
- VID_BlitLinearScreen_ASM(srcptr,destptr,width,height,srcrowbytes,destrowbytes);
-#else
if ((srcrowbytes == destrowbytes) && (srcrowbytes == (size_t)width))
M_Memcpy(destptr, srcptr, srcrowbytes * height);
else
@@ -299,7 +290,6 @@ void VID_BlitLinearScreen(const UINT8 *srcptr, UINT8 *destptr, INT32 width, INT3
srcptr += srcrowbytes;
}
}
-#endif
}
static UINT8 hudplusalpha[11] = { 10, 8, 6, 4, 2, 0, 0, 0, 0, 0, 0};
diff --git a/src/vid_copy.s b/src/vid_copy.s
deleted file mode 100644
index 050a8099..00000000
--- a/src/vid_copy.s
+++ /dev/null
@@ -1,61 +0,0 @@
-// SONIC ROBO BLAST 2
-//-----------------------------------------------------------------------------
-// Copyright (C) 1998-2000 by DooM Legacy Team.
-// Copyright (C) 1999-2018 by Sonic Team Junior.
-//
-// This program is free software distributed under the
-// terms of the GNU General Public License, version 2.
-// See the 'LICENSE' file for more details.
-//-----------------------------------------------------------------------------
-/// \file vid_copy.s
-/// \brief code for updating the linear frame buffer screen.
-
-#include "asm_defs.inc" // structures, must match the C structures!
-
-// DJGPPv2 is as fast as this one, but then someone may compile with a less
-// good version of DJGPP than mine, so this little asm will do the trick!
-
-#define srcptr 4+16
-#define destptr 8+16
-#define width 12+16
-#define height 16+16
-#define srcrowbytes 20+16
-#define destrowbytes 24+16
-
-// VID_BlitLinearScreen( src, dest, width, height, srcwidth, destwidth );
-// width is given as BYTES
-
-#ifdef __i386__
-
-.globl C(VID_BlitLinearScreen_ASM)
-C(VID_BlitLinearScreen_ASM):
- pushl %ebp // preserve caller's stack frame
- pushl %edi
- pushl %esi // preserve register variables
- pushl %ebx
-
- cld
- movl srcptr(%esp),%esi
- movl destptr(%esp),%edi
- movl width(%esp),%ebx
- movl srcrowbytes(%esp),%eax
- subl %ebx,%eax
- movl destrowbytes(%esp),%edx
- subl %ebx,%edx
- shrl $2,%ebx
- movl height(%esp),%ebp
-LLRowLoop:
- movl %ebx,%ecx
- rep/movsl (%esi),(%edi)
- addl %eax,%esi
- addl %edx,%edi
- decl %ebp
- jnz LLRowLoop
-
- popl %ebx // restore register variables
- popl %esi
- popl %edi
- popl %ebp // restore the caller's stack frame
-
- ret
-#endif
diff --git a/src/win32/Makefile.cfg b/src/win32/Makefile.cfg
index 71e8a962..94de3ba5 100644
--- a/src/win32/Makefile.cfg
+++ b/src/win32/Makefile.cfg
@@ -20,10 +20,6 @@ else
SDL_LDFLAGS?=-L../libs/SDL2/i686-w64-mingw32/lib -L../libs/SDL2_mixer/i686-w64-mingw32/lib -lmingw32 -lSDL2main -lSDL2 -mwindows
endif
-ifndef NOASM
- USEASM=1
-endif
-
ifndef NONET
ifndef MINGW64 #miniupnc is broken with MINGW64
HAVE_MINIUPNPC=1
diff --git a/tools/anglechk.c b/tools/anglechk.c
index 4a67069b..7f56abff 100644
--- a/tools/anglechk.c
+++ b/tools/anglechk.c
@@ -22,7 +22,6 @@
#ifdef _MSC_VER
#include
#endif
-#define NOASM
#include "../src/tables.h"
#define NO_M
#include "../src/m_fixed.c"