quakeforge/libs/video/renderer/sw/d_parta.S

/*
	d_parta.S

	x86 assembly-language 8-bpp particle-drawing code.

	Copyright (C) 1996-1997  Id Software, Inc.

	This program is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License
	as published by the Free Software Foundation; either version 2
	of the License, or (at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

	See the GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program; if not, write to:

		Free Software Foundation, Inc.
		59 Temple Place - Suite 330
		Boston, MA  02111-1307, USA

	$Id$
*/

#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include "asm_i386.h"
#include "quakeasm.h"
#include "d_ifacea.h"
#include "asm_draw.h"

#ifdef PIC
#undef USE_INTEL_ASM //XXX asm pic hack
#endif

#ifdef USE_INTEL_ASM

//----------------------------------------------------------------------
// 8-bpp particle drawing code.
//----------------------------------------------------------------------

//FIXME: comments, full optimization

//----------------------------------------------------------------------
// 8-bpp particle queueing code.
//----------------------------------------------------------------------

	.text

#define P	12+4

	.align 4
.globl C(D_DrawParticle)
C(D_DrawParticle):
	pushl	%ebp				// preserve caller's stack frame
	pushl	%edi				// preserve register variables
	pushl	%ebx

	movl	P(%esp),%edi

// FIXME: better FP overlap in general here

// transform point
//	VectorSubtract (p->org, r_origin, local);
	flds	C(r_origin)
	fsubrs	pt_org(%edi)
	flds	pt_org+4(%edi)
	fsubs	C(r_origin)+4
	flds	pt_org+8(%edi)
	fsubs	C(r_origin)+8
	fxch	%st(2)			// local[0] | local[1] | local[2]

//	transformed[2] = DotProduct(local, r_ppn);		
	flds	C(r_ppn)		// r_ppn[0] | local[0] | local[1] | local[2]
	fmul	%st(1),%st(0)	// dot0 | local[0] | local[1] | local[2]
	flds	C(r_ppn)+4	// r_ppn[1] | dot0 | local[0] | local[1] | local[2]
	fmul	%st(3),%st(0)	// dot1 | dot0 | local[0] | local[1] | local[2]
	flds	C(r_ppn)+8	// r_ppn[2] | dot1 | dot0 | local[0] |
						//  local[1] | local[2]
	fmul	%st(5),%st(0)	// dot2 | dot1 | dot0 | local[0] | local[1] | local[2]
	fxch	%st(2)		// dot0 | dot1 | dot2 | local[0] | local[1] | local[2]
	faddp	%st(0),%st(1) // dot0 + dot1 | dot2 | local[0] | local[1] |
						  //  local[2]
	faddp	%st(0),%st(1) // z | local[0] | local[1] | local[2]
	fld		%st(0)		// z | z | local[0] | local[1] |
						//  local[2]
	fdivrs	C(float_1)	// 1/z | z | local[0] | local[1] | local[2]
	fxch	%st(1)		// z | 1/z | local[0] | local[1] | local[2]

//	if (transformed[2] < PARTICLE_Z_CLIP)
//		return;
	fcomps	C(float_particle_z_clip)	// 1/z | local[0] | local[1] | local[2]
	fxch	%st(3)					// local[2] | local[0] | local[1] | 1/z

	flds	C(r_pup)	// r_pup[0] | local[2] | local[0] | local[1] | 1/z
	fmul	%st(2),%st(0)	// dot0 | local[2] | local[0] | local[1] | 1/z 
	flds	C(r_pup)+4	// r_pup[1] | dot0 | local[2] | local[0] |
						//  local[1] | 1/z 

	fnstsw	%ax
	testb	$1,%ah
	jnz		LPop6AndDone

//	transformed[1] = DotProduct(local, r_pup);
	fmul	%st(4),%st(0)	// dot1 | dot0 | local[2] | local[0] | local[1] | 1/z 
	flds	C(r_pup)+8	// r_pup[2] | dot1 | dot0 | local[2] |
						//  local[0] | local[1] | 1/z 
	fmul	%st(3),%st(0)	// dot2 | dot1 | dot0 | local[2] | local[0] |
						//  local[1] | 1/z 
	fxch	%st(2)		// dot0 | dot1 | dot2 | local[2] | local[0] |
						//  local[1] | 1/z 
	faddp	%st(0),%st(1) // dot0 + dot1 | dot2 | local[2] | local[0] |
						//  local[1] | 1/z 
	faddp	%st(0),%st(1) // y | local[2] | local[0] | local[1] | 1/z 
	fxch	%st(3)		// local[1] | local[2] | local[0] | y | 1/z 

//	transformed[0] = DotProduct(local, r_pright);
	fmuls	C(r_pright)+4	// dot1 | local[2] | local[0] | y | 1/z
	fxch	%st(2)		// local[0] | local[2] | dot1 | y | 1/z
	fmuls	C(r_pright)	// dot0 | local[2] | dot1 | y | 1/z
	fxch	%st(1)		// local[2] | dot0 | dot1 | y | 1/z
	fmuls	C(r_pright)+8	// dot2 | dot0 | dot1 | y | 1/z
	fxch	%st(2)		// dot1 | dot0 | dot2 | y | 1/z
	faddp	%st(0),%st(1) // dot1 + dot0 | dot2 | y | 1/z

	faddp	%st(0),%st(1)	// x | y | 1/z
	fxch	%st(1)			// y | x | 1/z

// project the point
	fmul	%st(2),%st(0)	// y/z | x | 1/z
	fxch	%st(1)			// x | y/z | 1/z
	fmul	%st(2),%st(0)	// x/z | y/z | 1/z
	fxch	%st(1)			// y/z | x/z | 1/z
	fsubrs	C(ycenter)		// v | x/z | 1/z
	fxch	%st(1)			// x/z | v | 1/z
	fadds	C(xcenter)		// u | v | 1/z
// FIXME: preadjust xcenter and ycenter
	fxch	%st(1)			// v | u | 1/z
	fadds	C(float_point5)	// v | u | 1/z
	fxch	%st(1)			// u | v | 1/z
	fadds	C(float_point5)	// u | v | 1/z
	fxch	%st(2)			// 1/z | v | u
	fmuls	C(DP_32768)		// 1/z * 0x8000 | v | u
	fxch	%st(2)			// u | v | 1/z * 0x8000

// FIXME: use Terje's fp->int trick here?
// FIXME: check we're getting proper rounding here
	fistpl	C(DP_u)			// v | 1/z * 0x8000
	fistpl	C(DP_v)			// 1/z * 0x8000

	movl	C(DP_u),%eax
	movl	C(DP_v),%edx

// if ((v > d_vrectbottom_particle) || 
// 	(u > d_vrectright_particle) ||
// 	(v < d_vrecty) ||
// 	(u < d_vrectx))
// {
// 	continue;
// }

	movl	C(d_vrectbottom_particle),%ebx
	movl	C(d_vrectright_particle),%ecx
	cmpl	%ebx,%edx
	jg		LPop1AndDone
	cmpl	%ecx,%eax
	jg		LPop1AndDone
	movl	C(d_vrecty),%ebx
	movl	C(d_vrectx),%ecx
	cmpl	%ebx,%edx
	jl		LPop1AndDone

	cmpl	%ecx,%eax
	jl		LPop1AndDone

	flds	pt_color(%edi)	// color | 1/z * 0x8000
	fstps	C(DP_Color)		// 1/z * 0x8000

	movl	C(d_viewbuffer),%ebx

	addl	%eax,%ebx
	movl	C(d_scantable)(,%edx,4),%edi		// point to the pixel

	imull	C(d_zrowbytes),%edx		// point to the z pixel

	leal	(%edx,%eax,2),%edx
	movl	C(d_pzbuffer),%eax

	fistpl	C(izi)

	addl	%ebx,%edi
	addl	%eax,%edx

// pix = izi >> d_pix_shift;

	movl	C(izi),%eax
	movl	C(d_pix_shift),%ecx
	shrl	%cl,%eax
	movl	C(izi),%ebp

// if (pix < d_pix_min)
// 		pix = d_pix_min;
// else if (pix > d_pix_max)
//  	pix = d_pix_max;

	movl	C(d_pix_min),%ebx
	movl	C(d_pix_max),%ecx
	cmpl	%ebx,%eax
	jnl		LTestPixMax
	movl	%ebx,%eax
	jmp		LTestDone

LTestPixMax:
	cmpl	%ecx,%eax
	jng		LTestDone
	movl	%ecx,%eax
LTestDone:

	movb	C(DP_Color),%ch

	movl	C(d_y_aspect_shift),%ebx
	testl	%ebx,%ebx
	jnz		LDefault

	cmpl	$4,%eax
	ja		LDefault

	jmp		*C(DP_EntryTable)-4(,%eax,4)

// 1x1
.globl	C(DP_1x1)
C(DP_1x1):
	cmpw	%bp,(%edx)		// just one pixel to do
	jg		LDone
	movw	%bp,(%edx)
	movb	%ch,(%edi)
	jmp		LDone

// 2x2
.globl	C(DP_2x2)
C(DP_2x2):
	pushl	%esi
	movl	C(screenwidth),%ebx
	movl	C(d_zrowbytes),%esi

	cmpw	%bp,(%edx)
	jg		L2x2_1
	movw	%bp,(%edx)
	movb	%ch,(%edi)
L2x2_1:
	cmpw	%bp,2(%edx)
	jg		L2x2_2
	movw	%bp,2(%edx)
	movb	%ch,1(%edi)
L2x2_2:
	cmpw	%bp,(%edx,%esi,1)
	jg		L2x2_3
	movw	%bp,(%edx,%esi,1)
	movb	%ch,(%edi,%ebx,1)
L2x2_3:
	cmpw	%bp,2(%edx,%esi,1)
	jg		L2x2_4
	movw	%bp,2(%edx,%esi,1)
	movb	%ch,1(%edi,%ebx,1)
L2x2_4:

	popl	%esi
	jmp		LDone

// 3x3
.globl	C(DP_3x3)
C(DP_3x3):
	pushl	%esi
	movl	C(screenwidth),%ebx
	movl	C(d_zrowbytes),%esi

	cmpw	%bp,(%edx)
	jg		L3x3_1
	movw	%bp,(%edx)
	movb	%ch,(%edi)
L3x3_1:
	cmpw	%bp,2(%edx)
	jg		L3x3_2
	movw	%bp,2(%edx)
	movb	%ch,1(%edi)
L3x3_2:
	cmpw	%bp,4(%edx)
	jg		L3x3_3
	movw	%bp,4(%edx)
	movb	%ch,2(%edi)
L3x3_3:

	cmpw	%bp,(%edx,%esi,1)
	jg		L3x3_4
	movw	%bp,(%edx,%esi,1)
	movb	%ch,(%edi,%ebx,1)
L3x3_4:
	cmpw	%bp,2(%edx,%esi,1)
	jg		L3x3_5
	movw	%bp,2(%edx,%esi,1)
	movb	%ch,1(%edi,%ebx,1)
L3x3_5:
	cmpw	%bp,4(%edx,%esi,1)
	jg		L3x3_6
	movw	%bp,4(%edx,%esi,1)
	movb	%ch,2(%edi,%ebx,1)
L3x3_6:

	cmpw	%bp,(%edx,%esi,2)
	jg		L3x3_7
	movw	%bp,(%edx,%esi,2)
	movb	%ch,(%edi,%ebx,2)
L3x3_7:
	cmpw	%bp,2(%edx,%esi,2)
	jg		L3x3_8
	movw	%bp,2(%edx,%esi,2)
	movb	%ch,1(%edi,%ebx,2)
L3x3_8:
	cmpw	%bp,4(%edx,%esi,2)
	jg		L3x3_9
	movw	%bp,4(%edx,%esi,2)
	movb	%ch,2(%edi,%ebx,2)
L3x3_9:

	popl	%esi
	jmp		LDone


// 4x4
.globl	C(DP_4x4)
C(DP_4x4):
	pushl	%esi
	movl	C(screenwidth),%ebx
	movl	C(d_zrowbytes),%esi

	cmpw	%bp,(%edx)
	jg		L4x4_1
	movw	%bp,(%edx)
	movb	%ch,(%edi)
L4x4_1:
	cmpw	%bp,2(%edx)
	jg		L4x4_2
	movw	%bp,2(%edx)
	movb	%ch,1(%edi)
L4x4_2:
	cmpw	%bp,4(%edx)
	jg		L4x4_3
	movw	%bp,4(%edx)
	movb	%ch,2(%edi)
L4x4_3:
	cmpw	%bp,6(%edx)
	jg		L4x4_4
	movw	%bp,6(%edx)
	movb	%ch,3(%edi)
L4x4_4:

	cmpw	%bp,(%edx,%esi,1)
	jg		L4x4_5
	movw	%bp,(%edx,%esi,1)
	movb	%ch,(%edi,%ebx,1)
L4x4_5:
	cmpw	%bp,2(%edx,%esi,1)
	jg		L4x4_6
	movw	%bp,2(%edx,%esi,1)
	movb	%ch,1(%edi,%ebx,1)
L4x4_6:
	cmpw	%bp,4(%edx,%esi,1)
	jg		L4x4_7
	movw	%bp,4(%edx,%esi,1)
	movb	%ch,2(%edi,%ebx,1)
L4x4_7:
	cmpw	%bp,6(%edx,%esi,1)
	jg		L4x4_8
	movw	%bp,6(%edx,%esi,1)
	movb	%ch,3(%edi,%ebx,1)
L4x4_8:

	leal	(%edx,%esi,2),%edx
	leal	(%edi,%ebx,2),%edi

	cmpw	%bp,(%edx)
	jg		L4x4_9
	movw	%bp,(%edx)
	movb	%ch,(%edi)
L4x4_9:
	cmpw	%bp,2(%edx)
	jg		L4x4_10
	movw	%bp,2(%edx)
	movb	%ch,1(%edi)
L4x4_10:
	cmpw	%bp,4(%edx)
	jg		L4x4_11
	movw	%bp,4(%edx)
	movb	%ch,2(%edi)
L4x4_11:
	cmpw	%bp,6(%edx)
	jg		L4x4_12
	movw	%bp,6(%edx)
	movb	%ch,3(%edi)
L4x4_12:

	cmpw	%bp,(%edx,%esi,1)
	jg		L4x4_13
	movw	%bp,(%edx,%esi,1)
	movb	%ch,(%edi,%ebx,1)
L4x4_13:
	cmpw	%bp,2(%edx,%esi,1)
	jg		L4x4_14
	movw	%bp,2(%edx,%esi,1)
	movb	%ch,1(%edi,%ebx,1)
L4x4_14:
	cmpw	%bp,4(%edx,%esi,1)
	jg		L4x4_15
	movw	%bp,4(%edx,%esi,1)
	movb	%ch,2(%edi,%ebx,1)
L4x4_15:
	cmpw	%bp,6(%edx,%esi,1)
	jg		L4x4_16
	movw	%bp,6(%edx,%esi,1)
	movb	%ch,3(%edi,%ebx,1)
L4x4_16:

	popl	%esi
	jmp		LDone

// default case, handling any size particle
LDefault:

// count = pix << d_y_aspect_shift;

	movl	%eax,%ebx
	movl	%eax,C(DP_Pix)
	movb	C(d_y_aspect_shift),%cl
	shll	%cl,%ebx

// for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth)
// {
// 	for (i=0 ; i<pix ; i++)
// 	{
// 		if (pz[i] <= izi)
// 		{
// 			pz[i] = izi;
// 			pdest[i] = color;
// 		}
// 	}
// }

LGenRowLoop:
	movl	C(DP_Pix),%eax

LGenColLoop:
	cmpw	%bp,-2(%edx,%eax,2)
	jg		LGSkip
	movw	%bp,-2(%edx,%eax,2)
	movb	%ch,-1(%edi,%eax,1)
LGSkip:
	decl	%eax			// --pix
	jnz		LGenColLoop

	addl	C(d_zrowbytes),%edx
	addl	C(screenwidth),%edi

	decl	%ebx			// --count
	jnz		LGenRowLoop

LDone:
	popl	%ebx				// restore register variables
	popl	%edi
	popl	%ebp				// restore the caller's stack frame
	ret

LPop6AndDone:
	fstp	%st(0)
	fstp	%st(0)
	fstp	%st(0)
	fstp	%st(0)
	fstp	%st(0)
LPop1AndDone:
	fstp	%st(0)
	jmp		LDone

#endif	// USE_INTEL_ASM

#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00			`/*`
			`d_parta.S`

			`x86 assembly-language 8-bpp particle-drawing code.`

			`Copyright (C) 1996-1997 Id Software, Inc.`

			`This program is free software; you can redistribute it and/or`
			`modify it under the terms of the GNU General Public License`
			`as published by the Free Software Foundation; either version 2`
			`of the License, or (at your option) any later version.`

			`This program is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.`

			`See the GNU General Public License for more details.`

			`You should have received a copy of the GNU General Public License`
			`along with this program; if not, write to:`

			`Free Software Foundation, Inc.`
			`59 Temple Place - Suite 330`
			`Boston, MA 02111-1307, USA`

			$Id$
			`*/`

			`#ifdef HAVE_CONFIG_H`
			`# include <config.h>`
			`#endif`
include/QF header cleanup. including config.h (or any file from include) is a nono because headers in include/QF get installed, but include don't and thus would break quakeforge-dev. 2001-05-31 05:33:13 +00:00			`#include "asm_i386.h"`
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00			`#include "quakeasm.h"`
			`#include "d_ifacea.h"`
			`#include "asm_draw.h"`

re-enable asm for static builds 2001-06-11 20:59:48 +00:00			`#ifdef PIC`
disable asm in the software renderer for now. This will allow me to fix the files one at a time and allow others to use software independent of asm usage (even if a little slowly (~14% slower on my dual c450)). 2001-05-25 14:57:31 +00:00			`#undef USE_INTEL_ASM //XXX asm pic hack`
re-enable asm for static builds 2001-06-11 20:59:48 +00:00			`#endif`
disable asm in the software renderer for now. This will allow me to fix the files one at a time and allow others to use software independent of asm usage (even if a little slowly (~14% slower on my dual c450)). 2001-05-25 14:57:31 +00:00
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00			`#ifdef USE_INTEL_ASM`

			`//----------------------------------------------------------------------`
			`// 8-bpp particle drawing code.`
			`//----------------------------------------------------------------------`

			`//FIXME: comments, full optimization`

			`//----------------------------------------------------------------------`
			`// 8-bpp particle queueing code.`
			`//----------------------------------------------------------------------`

			`.text`

			`#define P 12+4`

			`.align 4`
			`.globl C(D_DrawParticle)`
			`C(D_DrawParticle):`
			`pushl %ebp // preserve caller's stack frame`
			`pushl %edi // preserve register variables`
			`pushl %ebx`

			`movl P(%esp),%edi`

			`// FIXME: better FP overlap in general here`

			`// transform point`
			`// VectorSubtract (p->org, r_origin, local);`
			`flds C(r_origin)`
			`fsubrs pt_org(%edi)`
			`flds pt_org+4(%edi)`
			`fsubs C(r_origin)+4`
			`flds pt_org+8(%edi)`
			`fsubs C(r_origin)+8`
			`fxch %st(2) // local[0] \| local[1] \| local[2]`

			`// transformed[2] = DotProduct(local, r_ppn);`
			`flds C(r_ppn) // r_ppn[0] \| local[0] \| local[1] \| local[2]`
			`fmul %st(1),%st(0) // dot0 \| local[0] \| local[1] \| local[2]`
			`flds C(r_ppn)+4 // r_ppn[1] \| dot0 \| local[0] \| local[1] \| local[2]`
			`fmul %st(3),%st(0) // dot1 \| dot0 \| local[0] \| local[1] \| local[2]`
			`flds C(r_ppn)+8 // r_ppn[2] \| dot1 \| dot0 \| local[0] \|`
			`// local[1] \| local[2]`
			`fmul %st(5),%st(0) // dot2 \| dot1 \| dot0 \| local[0] \| local[1] \| local[2]`
			`fxch %st(2) // dot0 \| dot1 \| dot2 \| local[0] \| local[1] \| local[2]`
			`faddp %st(0),%st(1) // dot0 + dot1 \| dot2 \| local[0] \| local[1] \|`
			`// local[2]`
			`faddp %st(0),%st(1) // z \| local[0] \| local[1] \| local[2]`
			`fld %st(0) // z \| z \| local[0] \| local[1] \|`
			`// local[2]`
Fix asm ilnking in mingw. It seems that recent binutils/libtool doesn't like exporting symbols that don't begin with a _. 2011-09-10 08:49:14 +00:00			`fdivrs C(float_1) // 1/z \| z \| local[0] \| local[1] \| local[2]`
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00			`fxch %st(1) // z \| 1/z \| local[0] \| local[1] \| local[2]`

			`// if (transformed[2] < PARTICLE_Z_CLIP)`
			`// return;`
Fix asm ilnking in mingw. It seems that recent binutils/libtool doesn't like exporting symbols that don't begin with a _. 2011-09-10 08:49:14 +00:00			`fcomps C(float_particle_z_clip) // 1/z \| local[0] \| local[1] \| local[2]`
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00			`fxch %st(3) // local[2] \| local[0] \| local[1] \| 1/z`

			`flds C(r_pup) // r_pup[0] \| local[2] \| local[0] \| local[1] \| 1/z`
			`fmul %st(2),%st(0) // dot0 \| local[2] \| local[0] \| local[1] \| 1/z`
			`flds C(r_pup)+4 // r_pup[1] \| dot0 \| local[2] \| local[0] \|`
			`// local[1] \| 1/z`

			`fnstsw %ax`
			`testb $1,%ah`
			`jnz LPop6AndDone`

			`// transformed[1] = DotProduct(local, r_pup);`
			`fmul %st(4),%st(0) // dot1 \| dot0 \| local[2] \| local[0] \| local[1] \| 1/z`
			`flds C(r_pup)+8 // r_pup[2] \| dot1 \| dot0 \| local[2] \|`
			`// local[0] \| local[1] \| 1/z`
			`fmul %st(3),%st(0) // dot2 \| dot1 \| dot0 \| local[2] \| local[0] \|`
			`// local[1] \| 1/z`
			`fxch %st(2) // dot0 \| dot1 \| dot2 \| local[2] \| local[0] \|`
			`// local[1] \| 1/z`
			`faddp %st(0),%st(1) // dot0 + dot1 \| dot2 \| local[2] \| local[0] \|`
			`// local[1] \| 1/z`
			`faddp %st(0),%st(1) // y \| local[2] \| local[0] \| local[1] \| 1/z`
			`fxch %st(3) // local[1] \| local[2] \| local[0] \| y \| 1/z`

			`// transformed[0] = DotProduct(local, r_pright);`
			`fmuls C(r_pright)+4 // dot1 \| local[2] \| local[0] \| y \| 1/z`
			`fxch %st(2) // local[0] \| local[2] \| dot1 \| y \| 1/z`
			`fmuls C(r_pright) // dot0 \| local[2] \| dot1 \| y \| 1/z`
			`fxch %st(1) // local[2] \| dot0 \| dot1 \| y \| 1/z`
			`fmuls C(r_pright)+8 // dot2 \| dot0 \| dot1 \| y \| 1/z`
			`fxch %st(2) // dot1 \| dot0 \| dot2 \| y \| 1/z`
			`faddp %st(0),%st(1) // dot1 + dot0 \| dot2 \| y \| 1/z`

			`faddp %st(0),%st(1) // x \| y \| 1/z`
			`fxch %st(1) // y \| x \| 1/z`

			`// project the point`
			`fmul %st(2),%st(0) // y/z \| x \| 1/z`
			`fxch %st(1) // x \| y/z \| 1/z`
			`fmul %st(2),%st(0) // x/z \| y/z \| 1/z`
			`fxch %st(1) // y/z \| x/z \| 1/z`
			`fsubrs C(ycenter) // v \| x/z \| 1/z`
			`fxch %st(1) // x/z \| v \| 1/z`
			`fadds C(xcenter) // u \| v \| 1/z`
			`// FIXME: preadjust xcenter and ycenter`
			`fxch %st(1) // v \| u \| 1/z`
Fix asm ilnking in mingw. It seems that recent binutils/libtool doesn't like exporting symbols that don't begin with a _. 2011-09-10 08:49:14 +00:00			`fadds C(float_point5) // v \| u \| 1/z`
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00			`fxch %st(1) // u \| v \| 1/z`
Fix asm ilnking in mingw. It seems that recent binutils/libtool doesn't like exporting symbols that don't begin with a _. 2011-09-10 08:49:14 +00:00			`fadds C(float_point5) // u \| v \| 1/z`
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00			`fxch %st(2) // 1/z \| v \| u`
Fix asm ilnking in mingw. It seems that recent binutils/libtool doesn't like exporting symbols that don't begin with a _. 2011-09-10 08:49:14 +00:00			`fmuls C(DP_32768) // 1/z * 0x8000 \| v \| u`
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00			`fxch %st(2) // u \| v \| 1/z * 0x8000`

			`// FIXME: use Terje's fp->int trick here?`
			`// FIXME: check we're getting proper rounding here`
Fix asm ilnking in mingw. It seems that recent binutils/libtool doesn't like exporting symbols that don't begin with a _. 2011-09-10 08:49:14 +00:00			`fistpl C(DP_u) // v \| 1/z * 0x8000`
			`fistpl C(DP_v) // 1/z * 0x8000`
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00
Fix asm ilnking in mingw. It seems that recent binutils/libtool doesn't like exporting symbols that don't begin with a _. 2011-09-10 08:49:14 +00:00			`movl C(DP_u),%eax`
			`movl C(DP_v),%edx`
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00
			`// if ((v > d_vrectbottom_particle) \|\|`
			`// (u > d_vrectright_particle) \|\|`
			`// (v < d_vrecty) \|\|`
			`// (u < d_vrectx))`
			`// {`
			`// continue;`
			`// }`

			`movl C(d_vrectbottom_particle),%ebx`
			`movl C(d_vrectright_particle),%ecx`
			`cmpl %ebx,%edx`
			`jg LPop1AndDone`
			`cmpl %ecx,%eax`
			`jg LPop1AndDone`
			`movl C(d_vrecty),%ebx`
			`movl C(d_vrectx),%ecx`
			`cmpl %ebx,%edx`
			`jl LPop1AndDone`

			`cmpl %ecx,%eax`
			`jl LPop1AndDone`

			`flds pt_color(%edi) // color \| 1/z * 0x8000`
Fix asm ilnking in mingw. It seems that recent binutils/libtool doesn't like exporting symbols that don't begin with a _. 2011-09-10 08:49:14 +00:00			`fstps C(DP_Color) // 1/z * 0x8000`
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00
			`movl C(d_viewbuffer),%ebx`

			`addl %eax,%ebx`
			`movl C(d_scantable)(,%edx,4),%edi // point to the pixel`

			`imull C(d_zrowbytes),%edx // point to the z pixel`

			`leal (%edx,%eax,2),%edx`
			`movl C(d_pzbuffer),%eax`

Fix asm ilnking in mingw. It seems that recent binutils/libtool doesn't like exporting symbols that don't begin with a _. 2011-09-10 08:49:14 +00:00			`fistpl C(izi)`
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00
			`addl %ebx,%edi`
			`addl %eax,%edx`

			`// pix = izi >> d_pix_shift;`

Fix asm ilnking in mingw. It seems that recent binutils/libtool doesn't like exporting symbols that don't begin with a _. 2011-09-10 08:49:14 +00:00			`movl C(izi),%eax`
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00			`movl C(d_pix_shift),%ecx`
			`shrl %cl,%eax`
Fix asm ilnking in mingw. It seems that recent binutils/libtool doesn't like exporting symbols that don't begin with a _. 2011-09-10 08:49:14 +00:00			`movl C(izi),%ebp`
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00
			`// if (pix < d_pix_min)`
			`// pix = d_pix_min;`
			`// else if (pix > d_pix_max)`
			`// pix = d_pix_max;`

			`movl C(d_pix_min),%ebx`
			`movl C(d_pix_max),%ecx`
			`cmpl %ebx,%eax`
			`jnl LTestPixMax`
			`movl %ebx,%eax`
			`jmp LTestDone`

			`LTestPixMax:`
			`cmpl %ecx,%eax`
			`jng LTestDone`
			`movl %ecx,%eax`
			`LTestDone:`

Fix asm ilnking in mingw. It seems that recent binutils/libtool doesn't like exporting symbols that don't begin with a _. 2011-09-10 08:49:14 +00:00			`movb C(DP_Color),%ch`
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00
			`movl C(d_y_aspect_shift),%ebx`
			`testl %ebx,%ebx`
			`jnz LDefault`

			`cmpl $4,%eax`
			`ja LDefault`

Fix asm ilnking in mingw. It seems that recent binutils/libtool doesn't like exporting symbols that don't begin with a _. 2011-09-10 08:49:14 +00:00			`jmp *C(DP_EntryTable)-4(,%eax,4)`
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00
			`// 1x1`
Fix asm ilnking in mingw. It seems that recent binutils/libtool doesn't like exporting symbols that don't begin with a _. 2011-09-10 08:49:14 +00:00			`.globl C(DP_1x1)`
			`C(DP_1x1):`
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00			`cmpw %bp,(%edx) // just one pixel to do`
			`jg LDone`
			`movw %bp,(%edx)`
			`movb %ch,(%edi)`
			`jmp LDone`

			`// 2x2`
Fix asm ilnking in mingw. It seems that recent binutils/libtool doesn't like exporting symbols that don't begin with a _. 2011-09-10 08:49:14 +00:00			`.globl C(DP_2x2)`
			`C(DP_2x2):`
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00			`pushl %esi`
			`movl C(screenwidth),%ebx`
			`movl C(d_zrowbytes),%esi`

			`cmpw %bp,(%edx)`
			`jg L2x2_1`
			`movw %bp,(%edx)`
			`movb %ch,(%edi)`
			`L2x2_1:`
			`cmpw %bp,2(%edx)`
			`jg L2x2_2`
			`movw %bp,2(%edx)`
			`movb %ch,1(%edi)`
			`L2x2_2:`
			`cmpw %bp,(%edx,%esi,1)`
			`jg L2x2_3`
			`movw %bp,(%edx,%esi,1)`
			`movb %ch,(%edi,%ebx,1)`
			`L2x2_3:`
			`cmpw %bp,2(%edx,%esi,1)`
			`jg L2x2_4`
			`movw %bp,2(%edx,%esi,1)`
			`movb %ch,1(%edi,%ebx,1)`
			`L2x2_4:`

			`popl %esi`
			`jmp LDone`

			`// 3x3`
Fix asm ilnking in mingw. It seems that recent binutils/libtool doesn't like exporting symbols that don't begin with a _. 2011-09-10 08:49:14 +00:00			`.globl C(DP_3x3)`
			`C(DP_3x3):`
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00			`pushl %esi`
			`movl C(screenwidth),%ebx`
			`movl C(d_zrowbytes),%esi`

			`cmpw %bp,(%edx)`
			`jg L3x3_1`
			`movw %bp,(%edx)`
			`movb %ch,(%edi)`
			`L3x3_1:`
			`cmpw %bp,2(%edx)`
			`jg L3x3_2`
			`movw %bp,2(%edx)`
			`movb %ch,1(%edi)`
			`L3x3_2:`
			`cmpw %bp,4(%edx)`
			`jg L3x3_3`
			`movw %bp,4(%edx)`
			`movb %ch,2(%edi)`
			`L3x3_3:`

			`cmpw %bp,(%edx,%esi,1)`
			`jg L3x3_4`
			`movw %bp,(%edx,%esi,1)`
			`movb %ch,(%edi,%ebx,1)`
			`L3x3_4:`
			`cmpw %bp,2(%edx,%esi,1)`
			`jg L3x3_5`
			`movw %bp,2(%edx,%esi,1)`
			`movb %ch,1(%edi,%ebx,1)`
			`L3x3_5:`
			`cmpw %bp,4(%edx,%esi,1)`
			`jg L3x3_6`
			`movw %bp,4(%edx,%esi,1)`
			`movb %ch,2(%edi,%ebx,1)`
			`L3x3_6:`

			`cmpw %bp,(%edx,%esi,2)`
			`jg L3x3_7`
			`movw %bp,(%edx,%esi,2)`
			`movb %ch,(%edi,%ebx,2)`
			`L3x3_7:`
			`cmpw %bp,2(%edx,%esi,2)`
			`jg L3x3_8`
			`movw %bp,2(%edx,%esi,2)`
			`movb %ch,1(%edi,%ebx,2)`
			`L3x3_8:`
			`cmpw %bp,4(%edx,%esi,2)`
			`jg L3x3_9`
			`movw %bp,4(%edx,%esi,2)`
			`movb %ch,2(%edi,%ebx,2)`
			`L3x3_9:`

			`popl %esi`
			`jmp LDone`


			`// 4x4`
Fix asm ilnking in mingw. It seems that recent binutils/libtool doesn't like exporting symbols that don't begin with a _. 2011-09-10 08:49:14 +00:00			`.globl C(DP_4x4)`
			`C(DP_4x4):`
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00			`pushl %esi`
			`movl C(screenwidth),%ebx`
			`movl C(d_zrowbytes),%esi`

			`cmpw %bp,(%edx)`
			`jg L4x4_1`
			`movw %bp,(%edx)`
			`movb %ch,(%edi)`
			`L4x4_1:`
			`cmpw %bp,2(%edx)`
			`jg L4x4_2`
			`movw %bp,2(%edx)`
			`movb %ch,1(%edi)`
			`L4x4_2:`
			`cmpw %bp,4(%edx)`
			`jg L4x4_3`
			`movw %bp,4(%edx)`
			`movb %ch,2(%edi)`
			`L4x4_3:`
			`cmpw %bp,6(%edx)`
			`jg L4x4_4`
			`movw %bp,6(%edx)`
			`movb %ch,3(%edi)`
			`L4x4_4:`

			`cmpw %bp,(%edx,%esi,1)`
			`jg L4x4_5`
			`movw %bp,(%edx,%esi,1)`
			`movb %ch,(%edi,%ebx,1)`
			`L4x4_5:`
			`cmpw %bp,2(%edx,%esi,1)`
			`jg L4x4_6`
			`movw %bp,2(%edx,%esi,1)`
			`movb %ch,1(%edi,%ebx,1)`
			`L4x4_6:`
			`cmpw %bp,4(%edx,%esi,1)`
			`jg L4x4_7`
			`movw %bp,4(%edx,%esi,1)`
			`movb %ch,2(%edi,%ebx,1)`
			`L4x4_7:`
			`cmpw %bp,6(%edx,%esi,1)`
			`jg L4x4_8`
			`movw %bp,6(%edx,%esi,1)`
			`movb %ch,3(%edi,%ebx,1)`
			`L4x4_8:`

			`leal (%edx,%esi,2),%edx`
			`leal (%edi,%ebx,2),%edi`

			`cmpw %bp,(%edx)`
			`jg L4x4_9`
			`movw %bp,(%edx)`
			`movb %ch,(%edi)`
			`L4x4_9:`
			`cmpw %bp,2(%edx)`
			`jg L4x4_10`
			`movw %bp,2(%edx)`
			`movb %ch,1(%edi)`
			`L4x4_10:`
			`cmpw %bp,4(%edx)`
			`jg L4x4_11`
			`movw %bp,4(%edx)`
			`movb %ch,2(%edi)`
			`L4x4_11:`
			`cmpw %bp,6(%edx)`
			`jg L4x4_12`
			`movw %bp,6(%edx)`
			`movb %ch,3(%edi)`
			`L4x4_12:`

			`cmpw %bp,(%edx,%esi,1)`
			`jg L4x4_13`
			`movw %bp,(%edx,%esi,1)`
			`movb %ch,(%edi,%ebx,1)`
			`L4x4_13:`
			`cmpw %bp,2(%edx,%esi,1)`
			`jg L4x4_14`
			`movw %bp,2(%edx,%esi,1)`
			`movb %ch,1(%edi,%ebx,1)`
			`L4x4_14:`
			`cmpw %bp,4(%edx,%esi,1)`
			`jg L4x4_15`
			`movw %bp,4(%edx,%esi,1)`
			`movb %ch,2(%edi,%ebx,1)`
			`L4x4_15:`
			`cmpw %bp,6(%edx,%esi,1)`
			`jg L4x4_16`
			`movw %bp,6(%edx,%esi,1)`
			`movb %ch,3(%edi,%ebx,1)`
			`L4x4_16:`

			`popl %esi`
			`jmp LDone`

			`// default case, handling any size particle`
			`LDefault:`

			`// count = pix << d_y_aspect_shift;`

			`movl %eax,%ebx`
Fix asm ilnking in mingw. It seems that recent binutils/libtool doesn't like exporting symbols that don't begin with a _. 2011-09-10 08:49:14 +00:00			`movl %eax,C(DP_Pix)`
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00			`movb C(d_y_aspect_shift),%cl`
			`shll %cl,%ebx`

			`// for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth)`
			`// {`
			`// for (i=0 ; i<pix ; i++)`
			`// {`
			`// if (pz[i] <= izi)`
			`// {`
			`// pz[i] = izi;`
			`// pdest[i] = color;`
			`// }`
			`// }`
			`// }`

			`LGenRowLoop:`
Fix asm ilnking in mingw. It seems that recent binutils/libtool doesn't like exporting symbols that don't begin with a _. 2011-09-10 08:49:14 +00:00			`movl C(DP_Pix),%eax`
initial checkin of most recent newtree and nuq(?) source 2001-02-19 21:15:25 +00:00
			`LGenColLoop:`
			`cmpw %bp,-2(%edx,%eax,2)`
			`jg LGSkip`
			`movw %bp,-2(%edx,%eax,2)`
			`movb %ch,-1(%edi,%eax,1)`
			`LGSkip:`
			`decl %eax // --pix`
			`jnz LGenColLoop`

			`addl C(d_zrowbytes),%edx`
			`addl C(screenwidth),%edi`

			`decl %ebx // --count`
			`jnz LGenRowLoop`

			`LDone:`
			`popl %ebx // restore register variables`
			`popl %edi`
			`popl %ebp // restore the caller's stack frame`
			`ret`

			`LPop6AndDone:`
			`fstp %st(0)`
			`fstp %st(0)`
			`fstp %st(0)`
			`fstp %st(0)`
			`fstp %st(0)`
			`LPop1AndDone:`
			`fstp %st(0)`
			`jmp LDone`

			`#endif // USE_INTEL_ASM`
Add GNU-stack notes to assembly files Prevents GCC from assuming an executable stack is needed. Signed-off-by: Ionen Wolkens <ionen@gentoo.org> 2021-06-13 14:12:03 +00:00
			`#if defined(__linux__) && defined(__ELF__)`
			`.section .note.GNU-stack,"",%progbits`
			`#endif`