quakeforge/libs/video/renderer/sw/sw_rdrawa.S
Bill Currie bce7d5b832 [renderer] Clean up use of vup/vright/vpn
This moves the common camera setup code out of the individual drivers,
and completely removes vup/vright/vpn from the non-software renderers.
This has highlighted the craziness around AngleVectors with it putting
+X forward, -Y right and +Z up. The main issue with this is it requires
a 90 degree pre-rotation about the Z axis to get the camera pointing in
the right direction, and that's for the native sw renderer (vulkan needs
a 90 degree pre-rotation about X, and gl and glsl need to invert an
axis, too), though at least it's just a matrix swizzle and vector
negation. However, it does mean the camera matrices can't be used
directly.

Also rename vpn to vfwd (still abbreviated, but fwd is much clearer in
meaning (to me, at least) than pn (plane normal, I guess, but which
way?)).
2022-03-14 09:45:27 +09:00

852 lines
19 KiB
ArmAsm

/*
r_drawa.S
x86 assembly-language edge clipping and emission code
Copyright (C) 1996-1997 Id Software, Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to:
Free Software Foundation, Inc.
59 Temple Place - Suite 330
Boston, MA 02111-1307, USA
$Id$
*/
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include "asm_i386.h"
#include "quakeasm.h"
#include "asm_draw.h"
#include "d_ifacea.h"
#ifdef PIC
#undef USE_INTEL_ASM //XXX asm pic hack
#endif
#ifdef USE_INTEL_ASM
// !!! if these are changed, they must be changed in r_draw.c too !!!
#define FULLY_CLIPPED_CACHED 0x80000000
#define FRAMECOUNT_MASK 0x7FFFFFFF
.data
Ld0: .single 0.0
Ld1: .single 0.0
Lstack: .long 0
Lfp_near_clip: .single NEAR_CLIP
Lceilv0: .long 0
Lv: .long 0
Lu0: .long 0
Lv0: .long 0
Lzi0: .long 0
.text
//----------------------------------------------------------------------
// edge clipping code
//----------------------------------------------------------------------
#define pv0 4+12
#define pv1 8+12
#define clip 12+12
.align 4
.globl C(R_ClipEdge)
C(R_ClipEdge):
pushl %esi // preserve register variables
pushl %edi
pushl %ebx
movl %esp,Lstack // for clearing the stack later
// float d0, d1, f;
// mvertex_t clipvert;
movl clip(%esp),%ebx
movl pv0(%esp),%esi
movl pv1(%esp),%edx
// if (clip)
// {
testl %ebx,%ebx
jz Lemit
// do
// {
Lcliploop:
// d0 = DotProduct (pv0->position, clip->normal) - clip->dist;
// d1 = DotProduct (pv1->position, clip->normal) - clip->dist;
flds mv_position+0(%esi)
fmuls cp_normal+0(%ebx)
flds mv_position+4(%esi)
fmuls cp_normal+4(%ebx)
flds mv_position+8(%esi)
fmuls cp_normal+8(%ebx)
fxch %st(1)
faddp %st(0),%st(2) // d0mul2 | d0add0
flds mv_position+0(%edx)
fmuls cp_normal+0(%ebx)
flds mv_position+4(%edx)
fmuls cp_normal+4(%ebx)
flds mv_position+8(%edx)
fmuls cp_normal+8(%ebx)
fxch %st(1)
faddp %st(0),%st(2) // d1mul2 | d1add0 | d0mul2 | d0add0
fxch %st(3) // d0add0 | d1add0 | d0mul2 | d1mul2
faddp %st(0),%st(2) // d1add0 | dot0 | d1mul2
faddp %st(0),%st(2) // dot0 | dot1
fsubs cp_dist(%ebx) // d0 | dot1
fxch %st(1) // dot1 | d0
fsubs cp_dist(%ebx) // d1 | d0
fxch %st(1)
fstps Ld0
fstps Ld1
// if (d0 >= 0)
// {
movl Ld0,%eax
movl Ld1,%ecx
orl %eax,%ecx
js Lp2
// both points are unclipped
Lcontinue:
//
// R_ClipEdge (&clipvert, pv1, clip->next);
// return;
// }
// } while ((clip = clip->next) != NULL);
movl cp_next(%ebx),%ebx
testl %ebx,%ebx
jnz Lcliploop
// }
//// add the edge
// R_EmitEdge (pv0, pv1);
Lemit:
//
// set integer rounding to ceil mode, set to single precision
//
// FIXME: do away with by manually extracting integers from floats?
// FIXME: set less often
fldcw C(r_ceil_cw)
// edge_t *edge, *pcheck;
// int u_check;
// float u, u_step;
// vec3_t local, transformed;
// float *world;
// int v, v2, ceilv0;
// float scale, lzi0, u0, v0;
// int side;
// if (r_lastvertvalid)
// {
cmpl $0,C(r_lastvertvalid)
jz LCalcFirst
// u0 = r_u1;
// v0 = r_v1;
// lzi0 = r_lzi1;
// ceilv0 = r_ceilv1;
movl C(r_lzi1),%eax
movl C(r_u1),%ecx
movl %eax,Lzi0
movl %ecx,Lu0
movl C(r_v1),%ecx
movl C(r_ceilv1),%eax
movl %ecx,Lv0
movl %eax,Lceilv0
jmp LCalcSecond
// }
LCalcFirst:
// else
// {
// world = &pv0->position[0];
call LTransformAndProject // v0 | lzi0 | u0
fsts Lv0
fxch %st(2) // u0 | lzi0 | v0
fstps Lu0 // lzi0 | v0
fstps Lzi0 // v0
// ceilv0 = (int)(v0 - 2000) + 2000; // ceil(v0);
fistpl Lceilv0
// }
LCalcSecond:
// world = &pv1->position[0];
movl %edx,%esi
call LTransformAndProject // v1 | lzi1 | u1
flds Lu0 // u0 | v1 | lzi1 | u1
fxch %st(3) // u1 | v1 | lzi1 | u0
flds Lzi0 // lzi0 | u1 | v1 | lzi1 | u0
fxch %st(3) // lzi1 | u1 | v1 | lzi0 | u0
flds Lv0 // v0 | lzi1 | u1 | v1 | lzi0 | u0
fxch %st(3) // v1 | lzi1 | u1 | v0 | lzi0 | u0
// r_ceilv1 = (int)(r_v1 - 2000) + 2000; // ceil(r_v1);
fistl C(r_ceilv1)
fldcw C(r_single_cw) // put back normal floating-point state
fsts C(r_v1)
fxch %st(4) // lzi0 | lzi1 | u1 | v0 | v1 | u0
// if (r_lzi1 > lzi0)
// lzi0 = r_lzi1;
fcom %st(1)
fnstsw %ax
testb $1,%ah
jz LP0
fstp %st(0)
fld %st(0)
LP0:
fxch %st(1) // lzi1 | lzi0 | u1 | v0 | v1 | u0
fstps C(r_lzi1) // lzi0 | u1 | v0 | v1 | u0
fxch %st(1)
fsts C(r_u1)
fxch %st(1)
// if (lzi0 > r_nearzi) // for mipmap finding
// r_nearzi = lzi0;
fcoms C(r_nearzi)
fnstsw %ax
testb $0x45,%ah
jnz LP1
fsts C(r_nearzi)
LP1:
// // for right edges, all we want is the effect on 1/z
// if (r_nearzionly)
// return;
movl C(r_nearzionly),%eax
testl %eax,%eax
jz LP2
LPop5AndDone:
movl C(cacheoffset),%eax
movl C(r_framecount),%edx
cmpl $0x7FFFFFFF,%eax
jz LDoPop
andl $(FRAMECOUNT_MASK),%edx
orl $(FULLY_CLIPPED_CACHED),%edx
movl %edx,C(cacheoffset)
LDoPop:
fstp %st(0) // u1 | v0 | v1 | u0
fstp %st(0) // v0 | v1 | u0
fstp %st(0) // v1 | u0
fstp %st(0) // u0
fstp %st(0)
jmp Ldone
LP2:
// // create the edge
// if (ceilv0 == r_ceilv1)
// return; // horizontal edge
movl Lceilv0,%ebx
movl C(edge_p),%edi
movl C(r_ceilv1),%ecx
movl %edi,%edx
movl C(r_pedge),%esi
addl $(et_size),%edx
cmpl %ecx,%ebx
jz LPop5AndDone
movl C(r_pedge),%eax
movl %eax,et_owner(%edi)
// side = ceilv0 > r_ceilv1;
//
// edge->nearzi = lzi0;
fstps et_nearzi(%edi) // u1 | v0 | v1 | u0
// if (side == 1)
// {
jc LSide0
LSide1:
// // leading edge (go from p2 to p1)
// u_step = ((u0 - r_u1) / (v0 - r_v1));
fsubrp %st(0),%st(3) // v0 | v1 | u0-u1
fsub %st(1),%st(0) // v0-v1 | v1 | u0-u1
fdivrp %st(0),%st(2) // v1 | ustep
// r_emitted = 1;
movl $1,C(r_emitted)
// edge = edge_p++;
movl %edx,C(edge_p)
// pretouch next edge
movl (%edx),%eax
// v2 = ceilv0 - 1;
// v = r_ceilv1;
movl %ecx,%eax
leal -1(%ebx),%ecx
movl %eax,%ebx
// edge->surfs[0] = 0;
// edge->surfs[1] = surface_p - surfaces;
movl C(surface_p),%eax
movl C(surfaces),%esi
subl %edx,%edx
subl %esi,%eax
shrl $(SURF_T_SHIFT),%eax
movl %edx,et_surfs(%edi)
movl %eax,et_surfs+2(%edi)
subl %esi,%esi
// u = r_u1 + ((float)v - r_v1) * u_step;
movl %ebx,Lv
fildl Lv // v | v1 | ustep
fsubp %st(0),%st(1) // v-v1 | ustep
fmul %st(1),%st(0) // (v-v1)*ustep | ustep
fadds C(r_u1) // u | ustep
jmp LSideDone
// }
LSide0:
// else
// {
// // trailing edge (go from p1 to p2)
// u_step = ((r_u1 - u0) / (r_v1 - v0));
fsub %st(3),%st(0) // u1-u0 | v0 | v1 | u0
fxch %st(2) // v1 | v0 | u1-u0 | u0
fsub %st(1),%st(0) // v1-v0 | v0 | u1-u0 | u0
fdivrp %st(0),%st(2) // v0 | ustep | u0
// r_emitted = 1;
movl $1,C(r_emitted)
// edge = edge_p++;
movl %edx,C(edge_p)
// pretouch next edge
movl (%edx),%eax
// v = ceilv0;
// v2 = r_ceilv1 - 1;
decl %ecx
// edge->surfs[0] = surface_p - surfaces;
// edge->surfs[1] = 0;
movl C(surface_p),%eax
movl C(surfaces),%esi
subl %edx,%edx
subl %esi,%eax
shrl $(SURF_T_SHIFT),%eax
movl %edx,et_surfs+2(%edi)
movl %eax,et_surfs(%edi)
movl $1,%esi
// u = u0 + ((float)v - v0) * u_step;
movl %ebx,Lv
fildl Lv // v | v0 | ustep | u0
fsubp %st(0),%st(1) // v-v0 | ustep | u0
fmul %st(1),%st(0) // (v-v0)*ustep | ustep | u0
faddp %st(0),%st(2) // ustep | u
fxch %st(1) // u | ustep
// }
LSideDone:
// edge->u_step = u_step*0x100000;
// edge->u = u*0x100000 + 0xFFFFF;
fmuls C(fp_1m) // u*0x100000 | ustep
fxch %st(1) // ustep | u*0x100000
fmuls C(fp_1m) // ustep*0x100000 | u*0x100000
fxch %st(1) // u*0x100000 | ustep*0x100000
fadds C(fp_1m_minus_1) // u*0x100000 + 0xFFFFF | ustep*0x100000
fxch %st(1) // ustep*0x100000 | u*0x100000 + 0xFFFFF
fistpl et_u_step(%edi) // u*0x100000 + 0xFFFFF
fistpl et_u(%edi)
// // we need to do this to avoid stepping off the edges if a very nearly
// // horizontal edge is less than epsilon above a scan, and numeric error
// // causes it to incorrectly extend to the scan, and the extension of the
// // line goes off the edge of the screen
// // FIXME: is this actually needed?
// if (edge->u < r_refdef.vrect_x_adj_shift20)
// edge->u = r_refdef.vrect_x_adj_shift20;
// if (edge->u > r_refdef.vrectright_adj_shift20)
// edge->u = r_refdef.vrectright_adj_shift20;
movl et_u(%edi),%eax
movl C(r_refdef)+rd_vrect_x_adj_shift20,%edx
cmpl %edx,%eax
jl LP4
movl C(r_refdef)+rd_vrectright_adj_shift20,%edx
cmpl %edx,%eax
jng LP5
LP4:
movl %edx,et_u(%edi)
movl %edx,%eax
LP5:
// // sort the edge in normally
// u_check = edge->u;
//
// if (edge->surfs[0])
// u_check++; // sort trailers after leaders
addl %esi,%eax
// if (!newedges[v] || newedges[v]->u >= u_check)
// {
movl C(newedges)(,%ebx,4),%esi
testl %esi,%esi
jz LDoFirst
cmpl %eax,et_u(%esi)
jl LNotFirst
LDoFirst:
// edge->next = newedges[v];
// newedges[v] = edge;
movl %esi,et_next(%edi)
movl %edi,C(newedges)(,%ebx,4)
jmp LSetRemove
// }
LNotFirst:
// else
// {
// pcheck = newedges[v];
//
// while (pcheck->next && pcheck->next->u < u_check)
// pcheck = pcheck->next;
LFindInsertLoop:
movl %esi,%edx
movl et_next(%esi),%esi
testl %esi,%esi
jz LInsertFound
cmpl %eax,et_u(%esi)
jl LFindInsertLoop
LInsertFound:
// edge->next = pcheck->next;
// pcheck->next = edge;
movl %esi,et_next(%edi)
movl %edi,et_next(%edx)
// }
LSetRemove:
// edge->nextremove = removeedges[v2];
// removeedges[v2] = edge;
movl C(removeedges)(,%ecx,4),%eax
movl %edi,C(removeedges)(,%ecx,4)
movl %eax,et_nextremove(%edi)
Ldone:
movl Lstack,%esp // clear temporary variables from stack
popl %ebx // restore register variables
popl %edi
popl %esi
ret
// at least one point is clipped
Lp2:
testl %eax,%eax
jns Lp1
// else
// {
// // point 0 is clipped
// if (d1 < 0)
// {
movl Ld1,%eax
testl %eax,%eax
jns Lp3
// // both points are clipped
// // we do cache fully clipped edges
// if (!leftclipped)
movl C(r_leftclipped),%eax
movl C(r_pedge),%ecx
testl %eax,%eax
jnz Ldone
// r_pedge->framecount = r_framecount;
movl C(r_framecount),%eax
andl $(FRAMECOUNT_MASK),%eax
orl $(FULLY_CLIPPED_CACHED),%eax
movl %eax,C(cacheoffset)
// return;
jmp Ldone
// }
Lp1:
// // point 0 is unclipped
// if (d1 >= 0)
// {
// // both points are unclipped
// continue;
// // only point 1 is clipped
// f = d0 / (d0 - d1);
flds Ld0
flds Ld1
fsubr %st(1),%st(0)
// // we don't cache partially clipped edges
movl $0x7FFFFFFF,C(cacheoffset)
fdivrp %st(0),%st(1)
subl $(mv_size),%esp // allocate space for clipvert
// clipvert.position[0] = pv0->position[0] +
// f * (pv1->position[0] - pv0->position[0]);
// clipvert.position[1] = pv0->position[1] +
// f * (pv1->position[1] - pv0->position[1]);
// clipvert.position[2] = pv0->position[2] +
// f * (pv1->position[2] - pv0->position[2]);
flds mv_position+8(%edx)
fsubs mv_position+8(%esi)
flds mv_position+4(%edx)
fsubs mv_position+4(%esi)
flds mv_position+0(%edx)
fsubs mv_position+0(%esi) // 0 | 1 | 2
// replace pv1 with the clip point
movl %esp,%edx
movl cp_leftedge(%ebx),%eax
testb %al,%al
fmul %st(3),%st(0)
fxch %st(1) // 1 | 0 | 2
fmul %st(3),%st(0)
fxch %st(2) // 2 | 0 | 1
fmulp %st(0),%st(3) // 0 | 1 | 2
fadds mv_position+0(%esi)
fxch %st(1) // 1 | 0 | 2
fadds mv_position+4(%esi)
fxch %st(2) // 2 | 0 | 1
fadds mv_position+8(%esi)
fxch %st(1) // 0 | 2 | 1
fstps mv_position+0(%esp) // 2 | 1
fstps mv_position+8(%esp) // 1
fstps mv_position+4(%esp)
// if (clip->leftedge)
// {
jz Ltestright
// r_leftclipped = true;
// r_leftexit = clipvert;
movl $1,C(r_leftclipped)
movl mv_position+0(%esp),%eax
movl %eax,C(r_leftexit)+mv_position+0
movl mv_position+4(%esp),%eax
movl %eax,C(r_leftexit)+mv_position+4
movl mv_position+8(%esp),%eax
movl %eax,C(r_leftexit)+mv_position+8
jmp Lcontinue
// }
Ltestright:
// else if (clip->rightedge)
// {
testb %ah,%ah
jz Lcontinue
// r_rightclipped = true;
// r_rightexit = clipvert;
movl $1,C(r_rightclipped)
movl mv_position+0(%esp),%eax
movl %eax,C(r_rightexit)+mv_position+0
movl mv_position+4(%esp),%eax
movl %eax,C(r_rightexit)+mv_position+4
movl mv_position+8(%esp),%eax
movl %eax,C(r_rightexit)+mv_position+8
// }
//
// R_ClipEdge (pv0, &clipvert, clip->next);
// return;
// }
jmp Lcontinue
// }
Lp3:
// // only point 0 is clipped
// r_lastvertvalid = false;
movl $0,C(r_lastvertvalid)
// f = d0 / (d0 - d1);
flds Ld0
flds Ld1
fsubr %st(1),%st(0)
// // we don't cache partially clipped edges
movl $0x7FFFFFFF,C(cacheoffset)
fdivrp %st(0),%st(1)
subl $(mv_size),%esp // allocate space for clipvert
// clipvert.position[0] = pv0->position[0] +
// f * (pv1->position[0] - pv0->position[0]);
// clipvert.position[1] = pv0->position[1] +
// f * (pv1->position[1] - pv0->position[1]);
// clipvert.position[2] = pv0->position[2] +
// f * (pv1->position[2] - pv0->position[2]);
flds mv_position+8(%edx)
fsubs mv_position+8(%esi)
flds mv_position+4(%edx)
fsubs mv_position+4(%esi)
flds mv_position+0(%edx)
fsubs mv_position+0(%esi) // 0 | 1 | 2
movl cp_leftedge(%ebx),%eax
testb %al,%al
fmul %st(3),%st(0)
fxch %st(1) // 1 | 0 | 2
fmul %st(3),%st(0)
fxch %st(2) // 2 | 0 | 1
fmulp %st(0),%st(3) // 0 | 1 | 2
fadds mv_position+0(%esi)
fxch %st(1) // 1 | 0 | 2
fadds mv_position+4(%esi)
fxch %st(2) // 2 | 0 | 1
fadds mv_position+8(%esi)
fxch %st(1) // 0 | 2 | 1
fstps mv_position+0(%esp) // 2 | 1
fstps mv_position+8(%esp) // 1
fstps mv_position+4(%esp)
// replace pv0 with the clip point
movl %esp,%esi
// if (clip->leftedge)
// {
jz Ltestright2
// r_leftclipped = true;
// r_leftenter = clipvert;
movl $1,C(r_leftclipped)
movl mv_position+0(%esp),%eax
movl %eax,C(r_leftenter)+mv_position+0
movl mv_position+4(%esp),%eax
movl %eax,C(r_leftenter)+mv_position+4
movl mv_position+8(%esp),%eax
movl %eax,C(r_leftenter)+mv_position+8
jmp Lcontinue
// }
Ltestright2:
// else if (clip->rightedge)
// {
testb %ah,%ah
jz Lcontinue
// r_rightclipped = true;
// r_rightenter = clipvert;
movl $1,C(r_rightclipped)
movl mv_position+0(%esp),%eax
movl %eax,C(r_rightenter)+mv_position+0
movl mv_position+4(%esp),%eax
movl %eax,C(r_rightenter)+mv_position+4
movl mv_position+8(%esp),%eax
movl %eax,C(r_rightenter)+mv_position+8
// }
jmp Lcontinue
// %esi = vec3_t point to transform and project
// %edx preserved
LTransformAndProject:
// // transform and project
// VectorSubtract (world, modelorg, local);
flds mv_position+0(%esi)
fsubs C(modelorg)+0
flds mv_position+4(%esi)
fsubs C(modelorg)+4
flds mv_position+8(%esi)
fsubs C(modelorg)+8
fxch %st(2) // local[0] | local[1] | local[2]
// TransformVector (local, transformed);
//
// if (transformed[2] < NEAR_CLIP)
// transformed[2] = NEAR_CLIP;
//
// lzi0 = 1.0 / transformed[2];
fld %st(0) // local[0] | local[0] | local[1] | local[2]
fmuls C(vfwd)+0 // zm0 | local[0] | local[1] | local[2]
fld %st(1) // local[0] | zm0 | local[0] | local[1] |
// local[2]
fmuls C(vright)+0 // xm0 | zm0 | local[0] | local[1] | local[2]
fxch %st(2) // local[0] | zm0 | xm0 | local[1] | local[2]
fmuls C(vup)+0 // ym0 | zm0 | xm0 | local[1] | local[2]
fld %st(3) // local[1] | ym0 | zm0 | xm0 | local[1] |
// local[2]
fmuls C(vfwd)+4 // zm1 | ym0 | zm0 | xm0 | local[1] |
// local[2]
fld %st(4) // local[1] | zm1 | ym0 | zm0 | xm0 |
// local[1] | local[2]
fmuls C(vright)+4 // xm1 | zm1 | ym0 | zm0 | xm0 |
// local[1] | local[2]
fxch %st(5) // local[1] | zm1 | ym0 | zm0 | xm0 |
// xm1 | local[2]
fmuls C(vup)+4 // ym1 | zm1 | ym0 | zm0 | xm0 |
// xm1 | local[2]
fxch %st(1) // zm1 | ym1 | ym0 | zm0 | xm0 |
// xm1 | local[2]
faddp %st(0),%st(3) // ym1 | ym0 | zm2 | xm0 | xm1 | local[2]
fxch %st(3) // xm0 | ym0 | zm2 | ym1 | xm1 | local[2]
faddp %st(0),%st(4) // ym0 | zm2 | ym1 | xm2 | local[2]
faddp %st(0),%st(2) // zm2 | ym2 | xm2 | local[2]
fld %st(3) // local[2] | zm2 | ym2 | xm2 | local[2]
fmuls C(vfwd)+8 // zm3 | zm2 | ym2 | xm2 | local[2]
fld %st(4) // local[2] | zm3 | zm2 | ym2 | xm2 | local[2]
fmuls C(vright)+8 // xm3 | zm3 | zm2 | ym2 | xm2 | local[2]
fxch %st(5) // local[2] | zm3 | zm2 | ym2 | xm2 | xm3
fmuls C(vup)+8 // ym3 | zm3 | zm2 | ym2 | xm2 | xm3
fxch %st(1) // zm3 | ym3 | zm2 | ym2 | xm2 | xm3
faddp %st(0),%st(2) // ym3 | zm4 | ym2 | xm2 | xm3
fxch %st(4) // xm3 | zm4 | ym2 | xm2 | ym3
faddp %st(0),%st(3) // zm4 | ym2 | xm4 | ym3
fxch %st(1) // ym2 | zm4 | xm4 | ym3
faddp %st(0),%st(3) // zm4 | xm4 | ym4
fcoms Lfp_near_clip
fnstsw %ax
testb $1,%ah
jz LNoClip
fstp %st(0)
flds Lfp_near_clip
LNoClip:
fdivrs C(float_1) // lzi0 | x | y
fxch %st(1) // x | lzi0 | y
// // FIXME: build x/yscale into transform?
// scale = xscale * lzi0;
// u0 = (xcenter + scale*transformed[0]);
flds C(xscale) // xscale | x | lzi0 | y
fmul %st(2),%st(0) // scale | x | lzi0 | y
fmulp %st(0),%st(1) // scale*x | lzi0 | y
fadds C(xcenter) // u0 | lzi0 | y
// if (u0 < r_refdef.fvrectx_adj)
// u0 = r_refdef.fvrectx_adj;
// if (u0 > r_refdef.fvrectright_adj)
// u0 = r_refdef.fvrectright_adj;
// FIXME: use integer compares of floats?
fcoms C(r_refdef)+rd_fvrectx_adj
fnstsw %ax
testb $1,%ah
jz LClampP0
fstp %st(0)
flds C(r_refdef)+rd_fvrectx_adj
LClampP0:
fcoms C(r_refdef)+rd_fvrectright_adj
fnstsw %ax
testb $0x45,%ah
jnz LClampP1
fstp %st(0)
flds C(r_refdef)+rd_fvrectright_adj
LClampP1:
fld %st(1) // lzi0 | u0 | lzi0 | y
// scale = yscale * lzi0;
// v0 = (ycenter - scale*transformed[1]);
fmuls C(yscale) // scale | u0 | lzi0 | y
fmulp %st(0),%st(3) // u0 | lzi0 | scale*y
fxch %st(2) // scale*y | lzi0 | u0
fsubrs C(ycenter) // v0 | lzi0 | u0
// if (v0 < r_refdef.fvrecty_adj)
// v0 = r_refdef.fvrecty_adj;
// if (v0 > r_refdef.fvrectbottom_adj)
// v0 = r_refdef.fvrectbottom_adj;
// FIXME: use integer compares of floats?
fcoms C(r_refdef)+rd_fvrecty_adj
fnstsw %ax
testb $1,%ah
jz LClampP2
fstp %st(0)
flds C(r_refdef)+rd_fvrecty_adj
LClampP2:
fcoms C(r_refdef)+rd_fvrectbottom_adj
fnstsw %ax
testb $0x45,%ah
jnz LClampP3
fstp %st(0)
flds C(r_refdef)+rd_fvrectbottom_adj
LClampP3:
ret
#endif // USE_INTEL_ASM
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif