Fix asm ilnking in mingw.

It seems that recent binutils/libtool doesn't like exporting symbols that
don't begin with a _.
This commit is contained in:
Bill Currie 2011-09-10 17:49:14 +09:00
parent 1f4a6daafb
commit 8e30d3098e
9 changed files with 488 additions and 486 deletions

View file

@ -112,17 +112,17 @@ C(D_DrawSpans8):
//
// TODO: any overlap from rearranging?
flds C(d_sdivzstepu)
fmuls fp_8
fmuls C(fp_8)
movl C(cacheblock),%edx
flds C(d_tdivzstepu)
fmuls fp_8
fmuls C(fp_8)
movl pspans(%esp),%ebx // point to the first span descriptor
flds C(d_zistepu)
fmuls fp_8
movl %edx,pbase // pbase = cacheblock
fstps zi8stepu
fstps tdivz8stepu
fstps sdivz8stepu
fmuls C(fp_8)
movl %edx,C(pbase) // pbase = cacheblock
fstps C(zi8stepu)
fstps C(tdivz8stepu)
fstps C(sdivz8stepu)
LSpanLoop:
//
@ -175,7 +175,7 @@ LSpanLoop:
fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z
faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z
flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
flds C(fp_64k) // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv +
// du*d_zistepu; stays in %st(0) at end
@ -190,7 +190,7 @@ LSpanLoop:
//
movl C(d_viewbuffer),%ecx
movl espan_t_v(%ebx),%eax
movl %ebx,pspantemp // preserve spans pointer
movl %ebx,C(pspantemp) // preserve spans pointer
movl C(tadjust),%edx
movl C(sadjust),%esi
@ -208,7 +208,7 @@ LSpanLoop:
decl %ecx
jz LCleanup1 // if only one pixel, no need to start an FDIV
movl %ecx,spancountminus1
movl %ecx,C(spancountminus1)
// finish up the s and t calcs
fxch %st(1) // z*64k | 1/z | t/z | s/z
@ -218,10 +218,10 @@ LSpanLoop:
fxch %st(1) // z*64k | s | 1/z | t/z | s/z
fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
fxch %st(1) // s | t | 1/z | t/z | s/z
fistpl s // 1/z | t | t/z | s/z
fistpl t // 1/z | t/z | s/z
fistpl C(s) // 1/z | t | t/z | s/z
fistpl C(t) // 1/z | t/z | s/z
fildl spancountminus1
fildl C(spancountminus1)
flds C(d_tdivzstepu) // C(d_tdivzstepu) | spancountminus1
flds C(d_zistepu) // C(d_zistepu) | C(d_tdivzstepu) | spancountminus1
@ -238,7 +238,7 @@ LSpanLoop:
faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
faddp %st(0),%st(3)
flds fp_64k
flds C(fp_64k)
fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
// overlap
jmp LFDIVInFlight1
@ -252,8 +252,8 @@ LCleanup1:
fxch %st(1) // z*64k | s | 1/z | t/z | s/z
fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
fxch %st(1) // s | t | 1/z | t/z | s/z
fistpl s // 1/z | t | t/z | s/z
fistpl t // 1/z | t/z | s/z
fistpl C(s) // 1/z | t | t/z | s/z
fistpl C(t) // 1/z | t/z | s/z
jmp LFDIVInFlight1
.align 4
@ -266,41 +266,41 @@ LSetupNotLast1:
fxch %st(1) // z*64k | s | 1/z | t/z | s/z
fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
fxch %st(1) // s | t | 1/z | t/z | s/z
fistpl s // 1/z | t | t/z | s/z
fistpl t // 1/z | t/z | s/z
fistpl C(s) // 1/z | t | t/z | s/z
fistpl C(t) // 1/z | t/z | s/z
fadds zi8stepu
fadds C(zi8stepu)
fxch %st(2)
fadds sdivz8stepu
fadds C(sdivz8stepu)
fxch %st(2)
flds tdivz8stepu
flds C(tdivz8stepu)
faddp %st(0),%st(2)
flds fp_64k
flds C(fp_64k)
fdiv %st(1),%st(0) // z = 1/1/z
// this is what we've gone to all this trouble to
// overlap
LFDIVInFlight1:
addl s,%esi
addl t,%edx
addl C(s),%esi
addl C(t),%edx
movl C(bbextents),%ebx
movl C(bbextentt),%ebp
cmpl %ebx,%esi
ja LClampHighOrLow0
LClampReentry0:
movl %esi,s
movl pbase,%ebx
movl %esi,C(s)
movl C(pbase),%ebx
shll $16,%esi
cmpl %ebp,%edx
movl %esi,sfracf
movl %esi,C(sfracf)
ja LClampHighOrLow1
LClampReentry1:
movl %edx,t
movl s,%esi // sfrac = scans->sfrac;
movl %edx,C(t)
movl C(s),%esi // sfrac = scans->sfrac;
shll $16,%edx
movl t,%eax // tfrac = scans->tfrac;
movl C(t),%eax // tfrac = scans->tfrac;
sarl $16,%esi
movl %edx,tfracf
movl %edx,C(tfracf)
//
// calculate the texture starting address
@ -335,15 +335,15 @@ LNotLastSegment:
fxch %st(1)
fmul %st(3),%st(0) // t = t/z * z
fxch %st(1)
fistpl snext
fistpl tnext
movl snext,%eax
movl tnext,%edx
fistpl C(snext)
fistpl C(tnext)
movl C(snext),%eax
movl C(tnext),%edx
movb (%esi),%bl // get first source texel
subl $8,%ecx // count off this segments' pixels
movl C(sadjust),%ebp
movl %ecx,counttemp // remember count of remaining pixels
movl %ecx,C(counttemp) // remember count of remaining pixels
movl C(tadjust),%ecx
movb %bl,(%edi) // store first dest pixel
@ -366,11 +366,11 @@ LClampReentry2:
ja LClampHigh3
LClampReentry3:
movl %ebp,snext
movl %ecx,tnext
movl %ebp,C(snext)
movl %ecx,C(tnext)
subl s,%ebp
subl t,%ecx
subl C(s),%ebp
subl C(t),%ecx
//
// set up advancetable
@ -392,61 +392,61 @@ LSetUp1:
addl %edx,%eax // add in sstep
// (tstep >> 16) * cachewidth + (sstep >> 16);
movl tfracf,%edx
movl %eax,advancetable+4 // advance base in t
movl C(tfracf),%edx
movl %eax,C(advancetable)+4 // advance base in t
addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth +
// (sstep >> 16);
shll $13,%ebp // left-justify sstep fractional part
movl sfracf,%ebx
movl C(sfracf),%ebx
shll $13,%ecx // left-justify tstep fractional part
movl %eax,advancetable // advance extra in t
movl %eax,C(advancetable) // advance extra in t
movl %ecx,tstep
movl %ecx,C(tstep)
addl %ecx,%edx // advance tfrac fractional part by tstep frac
sbbl %ecx,%ecx // turn tstep carry into -1 (0 if none)
addl %ebp,%ebx // advance sfrac fractional part by sstep frac
adcl advancetable+4(,%ecx,4),%esi // point to next source texel
adcl C(advancetable)+4(,%ecx,4),%esi // point to next source texel
addl tstep,%edx
addl C(tstep),%edx
sbbl %ecx,%ecx
movb (%esi),%al
addl %ebp,%ebx
movb %al,1(%edi)
adcl advancetable+4(,%ecx,4),%esi
adcl C(advancetable)+4(,%ecx,4),%esi
addl tstep,%edx
addl C(tstep),%edx
sbbl %ecx,%ecx
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
adcl C(advancetable)+4(,%ecx,4),%esi
addl tstep,%edx
addl C(tstep),%edx
sbbl %ecx,%ecx
movb %al,2(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
adcl C(advancetable)+4(,%ecx,4),%esi
addl tstep,%edx
addl C(tstep),%edx
sbbl %ecx,%ecx
movb %al,3(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
adcl C(advancetable)+4(,%ecx,4),%esi
//
// start FDIV for end of next segment in flight, so it can overlap
//
movl counttemp,%ecx
movl C(counttemp),%ecx
cmpl $8,%ecx // more than one segment after this?
ja LSetupNotLast2 // yes
decl %ecx
jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV
movl %ecx,spancountminus1
fildl spancountminus1
movl %ecx,C(spancountminus1)
fildl C(spancountminus1)
flds C(d_zistepu) // C(d_zistepu) | spancountminus1
fmul %st(1),%st(0) // C(d_zistepu)*scm1 | scm1
@ -458,7 +458,7 @@ LSetUp1:
fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1
flds fp_64k // 64k | C(d_sdivzstepu)*scm1
flds C(fp_64k) // 64k | C(d_sdivzstepu)*scm1
fxch %st(1) // C(d_sdivzstepu)*scm1 | 64k
faddp %st(0),%st(4) // 64k
@ -468,49 +468,49 @@ LSetUp1:
.align 4
LSetupNotLast2:
fadds zi8stepu
fadds C(zi8stepu)
fxch %st(2)
fadds sdivz8stepu
fadds C(sdivz8stepu)
fxch %st(2)
flds tdivz8stepu
flds C(tdivz8stepu)
faddp %st(0),%st(2)
flds fp_64k
flds C(fp_64k)
fdiv %st(1),%st(0) // z = 1/1/z
// this is what we've gone to all this trouble to
// overlap
LFDIVInFlight2:
movl %ecx,counttemp
movl %ecx,C(counttemp)
addl tstep,%edx
addl C(tstep),%edx
sbbl %ecx,%ecx
movb %al,4(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
adcl C(advancetable)+4(,%ecx,4),%esi
addl tstep,%edx
addl C(tstep),%edx
sbbl %ecx,%ecx
movb %al,5(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
adcl C(advancetable)+4(,%ecx,4),%esi
addl tstep,%edx
addl C(tstep),%edx
sbbl %ecx,%ecx
movb %al,6(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
adcl C(advancetable)+4(,%ecx,4),%esi
addl $8,%edi
movl %edx,tfracf
movl snext,%edx
movl %ebx,sfracf
movl tnext,%ebx
movl %edx,s
movl %ebx,t
movl %edx,C(tfracf)
movl C(snext),%edx
movl %ebx,C(sfracf)
movl C(tnext),%ebx
movl %edx,C(s)
movl %ebx,C(t)
movl counttemp,%ecx // retrieve count
movl C(counttemp),%ecx // retrieve count
//
// determine whether last span or not
@ -540,16 +540,16 @@ LLastSegment:
fxch %st(1)
fmul %st(3),%st(0) // t = t/z * z
fxch %st(1)
fistpl snext
fistpl tnext
fistpl C(snext)
fistpl C(tnext)
movb (%esi),%al // load first texel in segment
movl C(tadjust),%ebx
movb %al,(%edi) // store first pixel in segment
movl C(sadjust),%eax
addl snext,%eax
addl tnext,%ebx
addl C(snext),%eax
addl C(tnext),%ebx
movl C(bbextents),%ebp
movl C(bbextentt),%edx
@ -559,7 +559,7 @@ LLastSegment:
cmpl %ebp,%eax
ja LClampHigh4
LClampReentry4:
movl %eax,snext
movl %eax,C(snext)
cmpl $2048,%ebx
jl LClampLow5
@ -570,25 +570,25 @@ LClampReentry5:
cmpl $1,%ecx // don't bother
je LOnlyOneStep // if two pixels in segment, there's only one step,
// of the segment length
subl s,%eax
subl t,%ebx
subl C(s),%eax
subl C(t),%ebx
addl %eax,%eax // convert to 15.17 format so multiply by 1.31
addl %ebx,%ebx // reciprocal yields 16.48
imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
imull C(reciprocal_table)-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
movl %edx,%ebp
movl %ebx,%eax
imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
imull C(reciprocal_table)-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
LSetEntryvec:
//
// set up advancetable
//
movl entryvec_table(,%ecx,4),%ebx
movl C(entryvec_table)(,%ecx,4),%ebx
movl %edx,%eax
movl %ebx,jumptemp // entry point into code for RET later
movl %ebx,C(jumptemp) // entry point into code for RET later
movl %ebp,%ecx
sarl $16,%edx // tstep >>= 16;
movl C(cachewidth),%ebx
@ -597,23 +597,23 @@ LSetEntryvec:
addl %ecx,%edx // add in sstep
// (tstep >> 16) * cachewidth + (sstep >> 16);
movl tfracf,%ecx
movl %edx,advancetable+4 // advance base in t
movl C(tfracf),%ecx
movl %edx,C(advancetable)+4 // advance base in t
addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth +
// (sstep >> 16);
shll $16,%ebp // left-justify sstep fractional part
movl sfracf,%ebx
movl C(sfracf),%ebx
shll $16,%eax // left-justify tstep fractional part
movl %edx,advancetable // advance extra in t
movl %edx,C(advancetable) // advance extra in t
movl %eax,tstep
movl %eax,C(tstep)
movl %ecx,%edx
addl %eax,%edx
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
adcl C(advancetable)+4(,%ecx,4),%esi
jmp *jumptemp // jump to the number-of-pixels handler
jmp *C(jumptemp) // jump to the number-of-pixels handler
//----------------------------------------
@ -624,128 +624,128 @@ LNoSteps:
LOnlyOneStep:
subl s,%eax
subl t,%ebx
subl C(s),%eax
subl C(t),%ebx
movl %eax,%ebp
movl %ebx,%edx
jmp LSetEntryvec
//----------------------------------------
.globl Entry2_8
Entry2_8:
.globl C(Entry2_8)
C(Entry2_8):
subl $6,%edi // adjust for hardwired offsets
movb (%esi),%al
jmp LLEntry2_8
//----------------------------------------
.globl Entry3_8
Entry3_8:
.globl C(Entry3_8)
C(Entry3_8):
subl $5,%edi // adjust for hardwired offsets
addl %eax,%edx
movb (%esi),%al
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
adcl C(advancetable)+4(,%ecx,4),%esi
jmp LLEntry3_8
//----------------------------------------
.globl Entry4_8
Entry4_8:
.globl C(Entry4_8)
C(Entry4_8):
subl $4,%edi // adjust for hardwired offsets
addl %eax,%edx
movb (%esi),%al
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
adcl C(advancetable)+4(,%ecx,4),%esi
addl C(tstep),%edx
jmp LLEntry4_8
//----------------------------------------
.globl Entry5_8
Entry5_8:
.globl C(Entry5_8)
C(Entry5_8):
subl $3,%edi // adjust for hardwired offsets
addl %eax,%edx
movb (%esi),%al
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
adcl C(advancetable)+4(,%ecx,4),%esi
addl C(tstep),%edx
jmp LLEntry5_8
//----------------------------------------
.globl Entry6_8
Entry6_8:
.globl C(Entry6_8)
C(Entry6_8):
subl $2,%edi // adjust for hardwired offsets
addl %eax,%edx
movb (%esi),%al
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
adcl C(advancetable)+4(,%ecx,4),%esi
addl C(tstep),%edx
jmp LLEntry6_8
//----------------------------------------
.globl Entry7_8
Entry7_8:
.globl C(Entry7_8)
C(Entry7_8):
decl %edi // adjust for hardwired offsets
addl %eax,%edx
movb (%esi),%al
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
adcl C(advancetable)+4(,%ecx,4),%esi
addl C(tstep),%edx
jmp LLEntry7_8
//----------------------------------------
.globl Entry8_8
Entry8_8:
.globl C(Entry8_8)
C(Entry8_8):
addl %eax,%edx
movb (%esi),%al
sbbl %ecx,%ecx
addl %ebp,%ebx
adcl advancetable+4(,%ecx,4),%esi
adcl C(advancetable)+4(,%ecx,4),%esi
addl tstep,%edx
addl C(tstep),%edx
sbbl %ecx,%ecx
movb %al,1(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
adcl C(advancetable)+4(,%ecx,4),%esi
addl C(tstep),%edx
LLEntry7_8:
sbbl %ecx,%ecx
movb %al,2(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
adcl C(advancetable)+4(,%ecx,4),%esi
addl C(tstep),%edx
LLEntry6_8:
sbbl %ecx,%ecx
movb %al,3(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
adcl C(advancetable)+4(,%ecx,4),%esi
addl C(tstep),%edx
LLEntry5_8:
sbbl %ecx,%ecx
movb %al,4(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
addl tstep,%edx
adcl C(advancetable)+4(,%ecx,4),%esi
addl C(tstep),%edx
LLEntry4_8:
sbbl %ecx,%ecx
movb %al,5(%edi)
addl %ebp,%ebx
movb (%esi),%al
adcl advancetable+4(,%ecx,4),%esi
adcl C(advancetable)+4(,%ecx,4),%esi
LLEntry3_8:
movb %al,6(%edi)
movb (%esi),%al
@ -760,7 +760,7 @@ LEndSpan:
fstp %st(0)
fstp %st(0)
movl pspantemp,%ebx // restore spans pointer
movl C(pspantemp),%ebx // restore spans pointer
movl espan_t_pnext(%ebx),%ebx // point to next span
testl %ebx,%ebx // any more spans?
movb %al,7(%edi)
@ -811,10 +811,10 @@ C(D_DrawZSpans):
testl %eax,%eax
jz LFNegSpan
fmuls Float2ToThe31nd
fistpl izistep // note: we are relying on FP exceptions being turned
fmuls C(Float2ToThe31nd)
fistpl C(izistep) // note: we are relying on FP exceptions being turned
// off here to avoid range problems
movl izistep,%ebx // remains loaded for all spans
movl C(izistep),%ebx // remains loaded for all spans
LFSpanLoop:
// set up the initial 1/z value
@ -831,7 +831,7 @@ LFSpanLoop:
faddp %st(0),%st(1)
// clamp if z is nearer than 2 (1/z > 0.5)
fcoms float_point5
fcoms C(float_point5)
addl %ecx,%edi
movl espan_t_u(%esi),%edx
addl %edx,%edx // word count
@ -842,11 +842,11 @@ LFSpanLoop:
testb $0x45,%ah
jz LClamp
fmuls Float2ToThe31nd
fistpl izi // note: we are relying on FP exceptions being turned
fmuls C(Float2ToThe31nd)
fistpl C(izi) // note: we are relying on FP exceptions being turned
// off here to avoid problems when the span is closer
// than 1/(2**31)
movl izi,%edx
movl C(izi),%edx
// at this point:
// %ebx = izistep
@ -926,10 +926,10 @@ LFSpanDone:
jmp LFDone
LFNegSpan:
fmuls FloatMinus2ToThe31nd
fistpl izistep // note: we are relying on FP exceptions being turned
fmuls C(FloatMinus2ToThe31nd)
fistpl C(izistep) // note: we are relying on FP exceptions being turned
// off here to avoid range problems
movl izistep,%ebx // remains loaded for all spans
movl C(izistep),%ebx // remains loaded for all spans
LFNegSpanLoop:
// set up the initial 1/z value
@ -946,7 +946,7 @@ LFNegSpanLoop:
faddp %st(0),%st(1)
// clamp if z is nearer than 2 (1/z > 0.5)
fcoms float_point5
fcoms C(float_point5)
addl %ecx,%edi
movl espan_t_u(%esi),%edx
addl %edx,%edx // word count
@ -957,11 +957,11 @@ LFNegSpanLoop:
testb $0x45,%ah
jz LClampNeg
fmuls Float2ToThe31nd
fistpl izi // note: we are relying on FP exceptions being turned
fmuls C(Float2ToThe31nd)
fistpl C(izi) // note: we are relying on FP exceptions being turned
// off here to avoid problems when the span is closer
// than 1/(2**31)
movl izi,%edx
movl C(izi),%edx
// at this point:
// %ebx = izistep

View file

@ -89,12 +89,12 @@ C(D_DrawParticle):
faddp %st(0),%st(1) // z | local[0] | local[1] | local[2]
fld %st(0) // z | z | local[0] | local[1] |
// local[2]
fdivrs float_1 // 1/z | z | local[0] | local[1] | local[2]
fdivrs C(float_1) // 1/z | z | local[0] | local[1] | local[2]
fxch %st(1) // z | 1/z | local[0] | local[1] | local[2]
// if (transformed[2] < PARTICLE_Z_CLIP)
// return;
fcomps float_particle_z_clip // 1/z | local[0] | local[1] | local[2]
fcomps C(float_particle_z_clip) // 1/z | local[0] | local[1] | local[2]
fxch %st(3) // local[2] | local[0] | local[1] | 1/z
flds C(r_pup) // r_pup[0] | local[2] | local[0] | local[1] | 1/z
@ -141,20 +141,20 @@ C(D_DrawParticle):
fadds C(xcenter) // u | v | 1/z
// FIXME: preadjust xcenter and ycenter
fxch %st(1) // v | u | 1/z
fadds float_point5 // v | u | 1/z
fadds C(float_point5) // v | u | 1/z
fxch %st(1) // u | v | 1/z
fadds float_point5 // u | v | 1/z
fadds C(float_point5) // u | v | 1/z
fxch %st(2) // 1/z | v | u
fmuls DP_32768 // 1/z * 0x8000 | v | u
fmuls C(DP_32768) // 1/z * 0x8000 | v | u
fxch %st(2) // u | v | 1/z * 0x8000
// FIXME: use Terje's fp->int trick here?
// FIXME: check we're getting proper rounding here
fistpl DP_u // v | 1/z * 0x8000
fistpl DP_v // 1/z * 0x8000
fistpl C(DP_u) // v | 1/z * 0x8000
fistpl C(DP_v) // 1/z * 0x8000
movl DP_u,%eax
movl DP_v,%edx
movl C(DP_u),%eax
movl C(DP_v),%edx
// if ((v > d_vrectbottom_particle) ||
// (u > d_vrectright_particle) ||
@ -179,7 +179,7 @@ C(D_DrawParticle):
jl LPop1AndDone
flds pt_color(%edi) // color | 1/z * 0x8000
fstps DP_Color // 1/z * 0x8000
fstps C(DP_Color) // 1/z * 0x8000
movl C(d_viewbuffer),%ebx
@ -191,17 +191,17 @@ C(D_DrawParticle):
leal (%edx,%eax,2),%edx
movl C(d_pzbuffer),%eax
fistpl izi
fistpl C(izi)
addl %ebx,%edi
addl %eax,%edx
// pix = izi >> d_pix_shift;
movl izi,%eax
movl C(izi),%eax
movl C(d_pix_shift),%ecx
shrl %cl,%eax
movl izi,%ebp
movl C(izi),%ebp
// if (pix < d_pix_min)
// pix = d_pix_min;
@ -221,7 +221,7 @@ LTestPixMax:
movl %ecx,%eax
LTestDone:
movb DP_Color,%ch
movb C(DP_Color),%ch
movl C(d_y_aspect_shift),%ebx
testl %ebx,%ebx
@ -230,11 +230,11 @@ LTestDone:
cmpl $4,%eax
ja LDefault
jmp *DP_EntryTable-4(,%eax,4)
jmp *C(DP_EntryTable)-4(,%eax,4)
// 1x1
.globl DP_1x1
DP_1x1:
.globl C(DP_1x1)
C(DP_1x1):
cmpw %bp,(%edx) // just one pixel to do
jg LDone
movw %bp,(%edx)
@ -242,8 +242,8 @@ DP_1x1:
jmp LDone
// 2x2
.globl DP_2x2
DP_2x2:
.globl C(DP_2x2)
C(DP_2x2):
pushl %esi
movl C(screenwidth),%ebx
movl C(d_zrowbytes),%esi
@ -273,8 +273,8 @@ L2x2_4:
jmp LDone
// 3x3
.globl DP_3x3
DP_3x3:
.globl C(DP_3x3)
C(DP_3x3):
pushl %esi
movl C(screenwidth),%ebx
movl C(d_zrowbytes),%esi
@ -332,8 +332,8 @@ L3x3_9:
// 4x4
.globl DP_4x4
DP_4x4:
.globl C(DP_4x4)
C(DP_4x4):
pushl %esi
movl C(screenwidth),%ebx
movl C(d_zrowbytes),%esi
@ -434,7 +434,7 @@ LDefault:
// count = pix << d_y_aspect_shift;
movl %eax,%ebx
movl %eax,DP_Pix
movl %eax,C(DP_Pix)
movb C(d_y_aspect_shift),%cl
shll %cl,%ebx
@ -451,7 +451,7 @@ LDefault:
// }
LGenRowLoop:
movl DP_Pix,%eax
movl C(DP_Pix),%eax
LGenColLoop:
cmpw %bp,-2(%edx,%eax,2)

View file

@ -183,7 +183,7 @@ C(D_PolysetCalcGradients):
// t1*p00_minus_p20 - t0*p10_minus_p20 |
// t1*p01_minus_p21 - t0*p11_minus_p21 |
// xstepdenominv | p00_minus_p20 | p11_minus_p21
fmuls float_minus_1 // ystepdenominv |
fmuls C(float_minus_1) // ystepdenominv |
// t1*p00_minus_p20 - t0*p10_minus_p20 |
// t1*p01_minus_p21 - t0*p11_minus_p21 |
// xstepdenominv | p00_minus_p20 | p11_minus_p21
@ -205,12 +205,12 @@ C(D_PolysetCalcGradients):
// (t1*p01_minus_p21 - t0*p11_minus_p21)*
// xstepdenominv | ystepdenominv |
// xstepdenominv | p00_minus_p20 | p11_minus_p21
fldcw r_ceil_cw
fldcw C(r_ceil_cw)
fistpl C(r_lstepy) // r_lstepx | ystepdenominv | xstepdenominv |
// p00_minus_p20 | p11_minus_p21
fistpl C(r_lstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 |
// p11_minus_p21
fldcw r_single_cw
fldcw C(r_single_cw)
// t0 = r_p0[2] - r_p2[2];
// t1 = r_p1[2] - r_p2[2];
@ -822,16 +822,16 @@ LRightEdgeStepped:
movl C(a_ststepxwhole),%ecx
movl C(r_affinetridesc)+atd_skinwidth,%edx
movl %ecx,advancetable+4 // advance base in t
movl %ecx,C(advancetable)+4 // advance base in t
addl %edx,%ecx
movl %ecx,advancetable // advance extra in t
movl %ecx,C(advancetable) // advance extra in t
movl C(a_tstepxfrac),%ecx
movw C(r_lstepx),%cx
movl %eax,%edx // count
movl %ecx,tstep
movl %ecx,C(tstep)
addl $7,%edx
shrl $3,%edx // count of full and partial loops
@ -889,12 +889,12 @@ LDraw8:
LPatch8:
movb %al,(%edi)
Lp1:
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl lzistepx,%ebp
adcl $0,%ebp
addl C(a_sstepxfrac),%ebx
adcl advancetable+4(,%eax,4),%esi
adcl C(advancetable)+4(,%eax,4),%esi
LDraw7:
cmpw 2(%ecx),%bp
@ -907,12 +907,12 @@ LDraw7:
LPatch7:
movb %al,1(%edi)
Lp2:
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl lzistepx,%ebp
adcl $0,%ebp
addl C(a_sstepxfrac),%ebx
adcl advancetable+4(,%eax,4),%esi
adcl C(advancetable)+4(,%eax,4),%esi
LDraw6:
cmpw 4(%ecx),%bp
@ -925,12 +925,12 @@ LDraw6:
LPatch6:
movb %al,2(%edi)
Lp3:
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl lzistepx,%ebp
adcl $0,%ebp
addl C(a_sstepxfrac),%ebx
adcl advancetable+4(,%eax,4),%esi
adcl C(advancetable)+4(,%eax,4),%esi
LDraw5:
cmpw 6(%ecx),%bp
@ -943,12 +943,12 @@ LDraw5:
LPatch5:
movb %al,3(%edi)
Lp4:
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl lzistepx,%ebp
adcl $0,%ebp
addl C(a_sstepxfrac),%ebx
adcl advancetable+4(,%eax,4),%esi
adcl C(advancetable)+4(,%eax,4),%esi
LDraw4:
cmpw 8(%ecx),%bp
@ -961,12 +961,12 @@ LDraw4:
LPatch4:
movb %al,4(%edi)
Lp5:
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl lzistepx,%ebp
adcl $0,%ebp
addl C(a_sstepxfrac),%ebx
adcl advancetable+4(,%eax,4),%esi
adcl C(advancetable)+4(,%eax,4),%esi
LDraw3:
cmpw 10(%ecx),%bp
@ -979,12 +979,12 @@ LDraw3:
LPatch3:
movb %al,5(%edi)
Lp6:
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl lzistepx,%ebp
adcl $0,%ebp
addl C(a_sstepxfrac),%ebx
adcl advancetable+4(,%eax,4),%esi
adcl C(advancetable)+4(,%eax,4),%esi
LDraw2:
cmpw 12(%ecx),%bp
@ -997,12 +997,12 @@ LDraw2:
LPatch2:
movb %al,6(%edi)
Lp7:
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl lzistepx,%ebp
adcl $0,%ebp
addl C(a_sstepxfrac),%ebx
adcl advancetable+4(,%eax,4),%esi
adcl C(advancetable)+4(,%eax,4),%esi
LDraw1:
cmpw 14(%ecx),%bp
@ -1015,12 +1015,12 @@ LDraw1:
LPatch1:
movb %al,7(%edi)
Lp8:
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl lzistepx,%ebp
adcl $0,%ebp
addl C(a_sstepxfrac),%ebx
adcl advancetable+4(,%eax,4),%esi
adcl C(advancetable)+4(,%eax,4),%esi
addl $8,%edi
addl $16,%ecx
@ -1652,7 +1652,7 @@ LNDLoop:
movl %eax,C(r_p0)+16
movl %esi,C(r_p0)+20
fdivrs float_1
fdivrs C(float_1)
// r_p1[0] = index1->v[0];
// r_p1[1] = index1->v[1];

View file

@ -108,25 +108,25 @@ C(D_SpriteDrawSpans):
//
// FIXME: any overlap from rearranging?
flds C(d_sdivzstepu)
fmuls fp_8
fmuls C(fp_8)
movl C(cacheblock),%edx
flds C(d_tdivzstepu)
fmuls fp_8
fmuls C(fp_8)
movl pspans(%esp),%ebx // point to the first span descriptor
flds C(d_zistepu)
fmuls fp_8
movl %edx,pbase // pbase = cacheblock
fmuls C(fp_8)
movl %edx,C(pbase) // pbase = cacheblock
flds C(d_zistepu)
fmuls fp_64kx64k
fmuls C(fp_64kx64k)
fxch %st(3)
fstps sdivz8stepu
fstps zi8stepu
fstps tdivz8stepu
fistpl izistep
movl izistep,%eax
fstps C(sdivz8stepu)
fstps C(zi8stepu)
fstps C(tdivz8stepu)
fistpl C(izistep)
movl C(izistep),%eax
rorl $16,%eax // put upper 16 bits in low word
movl sspan_t_count(%ebx),%ecx
movl %eax,izistep
movl %eax,C(izistep)
cmpl $0,%ecx
jle LNextSpan
@ -183,14 +183,14 @@ LSpanLoop:
fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z
faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z
flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
flds C(fp_64k) // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv +
// du*d_zistepu; stays in %st(0) at end
// 1/z | fp_64k | t/z | s/z
fld %st(0) // FIXME: get rid of stall on FMUL?
fmuls fp_64kx64k
fmuls C(fp_64kx64k)
fxch %st(1)
//
@ -199,21 +199,21 @@ LSpanLoop:
fdivr %st(0),%st(2) // 1/z | z*64k | t/z | s/z
fxch %st(1)
fistpl izi // 0.32 fixed-point 1/z
movl izi,%ebp
fistpl C(izi) // 0.32 fixed-point 1/z
movl C(izi),%ebp
//
// set pz to point to the first z-buffer pixel in the span
//
rorl $16,%ebp // put upper 16 bits in low word
movl sspan_t_v(%ebx),%eax
movl %ebp,izi
movl %ebp,C(izi)
movl sspan_t_u(%ebx),%ebp
imull C(d_zrowbytes)
shll $1,%ebp // a word per pixel
addl C(d_pzbuffer),%eax
addl %ebp,%eax
movl %eax,pz
movl %eax,C(pz)
//
// point %edi to the first pixel in the span
@ -236,7 +236,7 @@ LSpanLoop:
decl %ecx
jz LCleanup1 // if only one pixel, no need to start an FDIV
movl %ecx,spancountminus1
movl %ecx,C(spancountminus1)
// finish up the s and t calcs
fxch %st(1) // z*64k | 1/z | t/z | s/z
@ -246,10 +246,10 @@ LSpanLoop:
fxch %st(1) // z*64k | s | 1/z | t/z | s/z
fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
fxch %st(1) // s | t | 1/z | t/z | s/z
fistpl s // 1/z | t | t/z | s/z
fistpl t // 1/z | t/z | s/z
fistpl C(s) // 1/z | t | t/z | s/z
fistpl C(t) // 1/z | t/z | s/z
fildl spancountminus1
fildl C(spancountminus1)
flds C(d_tdivzstepu) // _d_tdivzstepu | spancountminus1
flds C(d_zistepu) // _d_zistepu | _d_tdivzstepu | spancountminus1
@ -266,7 +266,7 @@ LSpanLoop:
faddp %st(0),%st(3) // _d_sdivzstepu*scm1
faddp %st(0),%st(3)
flds fp_64k
flds C(fp_64k)
fdiv %st(1),%st(0) // this is what we've gone to all this trouble to
// overlap
jmp LFDIVInFlight1
@ -280,8 +280,8 @@ LCleanup1:
fxch %st(1) // z*64k | s | 1/z | t/z | s/z
fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
fxch %st(1) // s | t | 1/z | t/z | s/z
fistpl s // 1/z | t | t/z | s/z
fistpl t // 1/z | t/z | s/z
fistpl C(s) // 1/z | t | t/z | s/z
fistpl C(t) // 1/z | t/z | s/z
jmp LFDIVInFlight1
.align 4
@ -294,41 +294,41 @@ LSetupNotLast1:
fxch %st(1) // z*64k | s | 1/z | t/z | s/z
fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z
fxch %st(1) // s | t | 1/z | t/z | s/z
fistpl s // 1/z | t | t/z | s/z
fistpl t // 1/z | t/z | s/z
fistpl C(s) // 1/z | t | t/z | s/z
fistpl C(t) // 1/z | t/z | s/z
fadds zi8stepu
fadds C(zi8stepu)
fxch %st(2)
fadds sdivz8stepu
fadds C(sdivz8stepu)
fxch %st(2)
flds tdivz8stepu
flds C(tdivz8stepu)
faddp %st(0),%st(2)
flds fp_64k
flds C(fp_64k)
fdiv %st(1),%st(0) // z = 1/1/z
// this is what we've gone to all this trouble to
// overlap
LFDIVInFlight1:
addl s,%esi
addl t,%edx
addl C(s),%esi
addl C(t),%edx
movl C(bbextents),%ebx
movl C(bbextentt),%ebp
cmpl %ebx,%esi
ja LClampHighOrLow0
LClampReentry0:
movl %esi,s
movl pbase,%ebx
movl %esi,C(s)
movl C(pbase),%ebx
shll $16,%esi
cmpl %ebp,%edx
movl %esi,sfracf
movl %esi,C(sfracf)
ja LClampHighOrLow1
LClampReentry1:
movl %edx,t
movl s,%esi // sfrac = scans->sfrac;
movl %edx,C(t)
movl C(s),%esi // sfrac = scans->sfrac;
shll $16,%edx
movl t,%eax // tfrac = scans->tfrac;
movl C(t),%eax // tfrac = scans->tfrac;
sarl $16,%esi
movl %edx,tfracf
movl %edx,C(tfracf)
//
// calculate the texture starting address
@ -362,10 +362,10 @@ LNotLastSegment:
fxch %st(1)
fmul %st(3),%st(0) // t = t/z * z
fxch %st(1)
fistpl snext
fistpl tnext
movl snext,%eax
movl tnext,%edx
fistpl C(snext)
fistpl C(tnext)
movl C(snext),%eax
movl C(tnext),%edx
subl $8,%ecx // count off this segments' pixels
movl C(sadjust),%ebp
@ -390,11 +390,11 @@ LClampReentry2:
ja LClampHigh3
LClampReentry3:
movl %ebp,snext
movl %ecx,tnext
movl %ebp,C(snext)
movl %ecx,C(tnext)
subl s,%ebp
subl t,%ecx
subl C(s),%ebp
subl C(t),%ecx
//
// set up advancetable
@ -409,19 +409,19 @@ LClampReentry3:
LIsZero:
addl %edx,%eax // add in sstep
// (tstep >> 16) * cachewidth + (sstep >> 16);
movl tfracf,%edx
movl %eax,advancetable+4 // advance base in t
movl C(tfracf),%edx
movl %eax,C(advancetable)+4 // advance base in t
addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth +
// (sstep >> 16);
shll $13,%ebp // left-justify sstep fractional part
movl %ebp,sstep
movl sfracf,%ebx
movl %ebp,C(sstep)
movl C(sfracf),%ebx
shll $13,%ecx // left-justify tstep fractional part
movl %eax,advancetable // advance extra in t
movl %ecx,tstep
movl %eax,C(advancetable) // advance extra in t
movl %ecx,C(tstep)
movl pz,%ecx
movl izi,%ebp
movl C(pz),%ecx
movl C(izi),%ebp
cmpw (%ecx),%bp
jl Lp1
@ -431,13 +431,13 @@ LIsZero:
movw %bp,(%ecx)
movb %al,(%edi) // store first dest pixel
Lp1:
addl izistep,%ebp
addl C(izistep),%ebp
adcl $0,%ebp
addl tstep,%edx // advance tfrac fractional part by tstep frac
addl C(tstep),%edx // advance tfrac fractional part by tstep frac
sbbl %eax,%eax // turn tstep carry into -1 (0 if none)
addl sstep,%ebx // advance sfrac fractional part by sstep frac
adcl advancetable+4(,%eax,4),%esi // point to next source texel
addl C(sstep),%ebx // advance sfrac fractional part by sstep frac
adcl C(advancetable)+4(,%eax,4),%esi // point to next source texel
cmpw 2(%ecx),%bp
jl Lp2
@ -447,12 +447,12 @@ Lp1:
movw %bp,2(%ecx)
movb %al,1(%edi)
Lp2:
addl izistep,%ebp
addl C(izistep),%ebp
adcl $0,%ebp
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
addl C(sstep),%ebx
adcl C(advancetable)+4(,%eax,4),%esi
cmpw 4(%ecx),%bp
jl Lp3
@ -462,12 +462,12 @@ Lp2:
movw %bp,4(%ecx)
movb %al,2(%edi)
Lp3:
addl izistep,%ebp
addl C(izistep),%ebp
adcl $0,%ebp
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
addl C(sstep),%ebx
adcl C(advancetable)+4(,%eax,4),%esi
cmpw 6(%ecx),%bp
jl Lp4
@ -477,12 +477,12 @@ Lp3:
movw %bp,6(%ecx)
movb %al,3(%edi)
Lp4:
addl izistep,%ebp
addl C(izistep),%ebp
adcl $0,%ebp
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
addl C(sstep),%ebx
adcl C(advancetable)+4(,%eax,4),%esi
cmpw 8(%ecx),%bp
jl Lp5
@ -492,12 +492,12 @@ Lp4:
movw %bp,8(%ecx)
movb %al,4(%edi)
Lp5:
addl izistep,%ebp
addl C(izistep),%ebp
adcl $0,%ebp
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
addl C(sstep),%ebx
adcl C(advancetable)+4(,%eax,4),%esi
//
// start FDIV for end of next segment in flight, so it can overlap
@ -508,8 +508,8 @@ Lp5:
decl %eax
jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV
movl %eax,spancountminus1
fildl spancountminus1
movl %eax,C(spancountminus1)
fildl C(spancountminus1)
flds C(d_zistepu) // _d_zistepu | spancountminus1
fmul %st(1),%st(0) // _d_zistepu*scm1 | scm1
@ -521,7 +521,7 @@ Lp5:
fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
faddp %st(0),%st(3) // _d_sdivzstepu*scm1
flds fp_64k // 64k | _d_sdivzstepu*scm1
flds C(fp_64k) // 64k | _d_sdivzstepu*scm1
fxch %st(1) // _d_sdivzstepu*scm1 | 64k
faddp %st(0),%st(4) // 64k
@ -531,13 +531,13 @@ Lp5:
.align 4
LSetupNotLast2:
fadds zi8stepu
fadds C(zi8stepu)
fxch %st(2)
fadds sdivz8stepu
fadds C(sdivz8stepu)
fxch %st(2)
flds tdivz8stepu
flds C(tdivz8stepu)
faddp %st(0),%st(2)
flds fp_64k
flds C(fp_64k)
fdiv %st(1),%st(0) // z = 1/1/z
// this is what we've gone to all this trouble to
// overlap
@ -552,12 +552,12 @@ LFDIVInFlight2:
movw %bp,10(%ecx)
movb %al,5(%edi)
Lp6:
addl izistep,%ebp
addl C(izistep),%ebp
adcl $0,%ebp
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
addl C(sstep),%ebx
adcl C(advancetable)+4(,%eax,4),%esi
cmpw 12(%ecx),%bp
jl Lp7
@ -567,12 +567,12 @@ Lp6:
movw %bp,12(%ecx)
movb %al,6(%edi)
Lp7:
addl izistep,%ebp
addl C(izistep),%ebp
adcl $0,%ebp
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
addl C(sstep),%ebx
adcl C(advancetable)+4(,%eax,4),%esi
cmpw 14(%ecx),%bp
jl Lp8
@ -582,24 +582,24 @@ Lp7:
movw %bp,14(%ecx)
movb %al,7(%edi)
Lp8:
addl izistep,%ebp
addl C(izistep),%ebp
adcl $0,%ebp
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
addl C(sstep),%ebx
adcl C(advancetable)+4(,%eax,4),%esi
addl $8,%edi
addl $16,%ecx
movl %edx,tfracf
movl snext,%edx
movl %ebx,sfracf
movl tnext,%ebx
movl %edx,s
movl %ebx,t
movl %edx,C(tfracf)
movl C(snext),%edx
movl %ebx,C(sfracf)
movl C(tnext),%ebx
movl %edx,C(s)
movl %ebx,C(t)
movl %ecx,pz
movl %ebp,izi
movl %ecx,C(pz)
movl %ebp,C(izi)
popl %ecx // retrieve count
@ -630,14 +630,14 @@ LLastSegment:
fxch %st(1)
fmul %st(3),%st(0) // t = t/z * z
fxch %st(1)
fistpl snext
fistpl tnext
fistpl C(snext)
fistpl C(tnext)
movl C(tadjust),%ebx
movl C(sadjust),%eax
addl snext,%eax
addl tnext,%ebx
addl C(snext),%eax
addl C(tnext),%ebx
movl C(bbextents),%ebp
movl C(bbextentt),%edx
@ -647,7 +647,7 @@ LLastSegment:
cmpl %ebp,%eax
ja LClampHigh4
LClampReentry4:
movl %eax,snext
movl %eax,C(snext)
cmpl $2048,%ebx
jl LClampLow5
@ -658,22 +658,22 @@ LClampReentry5:
cmpl $1,%ecx // don't bother
je LOnlyOneStep // if two pixels in segment, there's only one step,
// of the segment length
subl s,%eax
subl t,%ebx
subl C(s),%eax
subl C(t),%ebx
addl %eax,%eax // convert to 15.17 format so multiply by 1.31
addl %ebx,%ebx // reciprocal yields 16.48
imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
imull C(reciprocal_table)-8(,%ecx,4) // sstep = (snext - s) / (spancount-1)
movl %edx,%ebp
movl %ebx,%eax
imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
imull C(reciprocal_table)-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1)
LSetEntryvec:
//
// set up advancetable
//
movl spr8entryvec_table(,%ecx,4),%ebx
movl C(spr8entryvec_table)(,%ecx,4),%ebx
movl %edx,%eax
pushl %ebx // entry point into code for RET later
movl %ebp,%ecx
@ -685,44 +685,44 @@ LSetEntryvec:
LIsZeroLast:
addl %ecx,%edx // add in sstep
// (tstep >> 16) * cachewidth + (sstep >> 16);
movl tfracf,%ecx
movl %edx,advancetable+4 // advance base in t
movl C(tfracf),%ecx
movl %edx,C(advancetable)+4 // advance base in t
addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth +
// (sstep >> 16);
shll $16,%ebp // left-justify sstep fractional part
movl sfracf,%ebx
movl C(sfracf),%ebx
shll $16,%eax // left-justify tstep fractional part
movl %edx,advancetable // advance extra in t
movl %edx,C(advancetable) // advance extra in t
movl %eax,tstep
movl %ebp,sstep
movl %eax,C(tstep)
movl %ebp,C(sstep)
movl %ecx,%edx
movl pz,%ecx
movl izi,%ebp
movl C(pz),%ecx
movl C(izi),%ebp
ret // jump to the number-of-pixels handler
//----------------------------------------
LNoSteps:
movl pz,%ecx
movl C(pz),%ecx
subl $7,%edi // adjust for hardwired offset
subl $14,%ecx
jmp LEndSpan
LOnlyOneStep:
subl s,%eax
subl t,%ebx
subl C(s),%eax
subl C(t),%ebx
movl %eax,%ebp
movl %ebx,%edx
jmp LSetEntryvec
//----------------------------------------
.globl Spr8Entry2_8
Spr8Entry2_8:
.globl C(Spr8Entry2_8)
C(Spr8Entry2_8):
subl $6,%edi // adjust for hardwired offsets
subl $12,%ecx
movb (%esi),%al
@ -730,48 +730,48 @@ Spr8Entry2_8:
//----------------------------------------
.globl Spr8Entry3_8
Spr8Entry3_8:
.globl C(Spr8Entry3_8)
C(Spr8Entry3_8):
subl $5,%edi // adjust for hardwired offsets
subl $10,%ecx
jmp LLEntry3_8
//----------------------------------------
.globl Spr8Entry4_8
Spr8Entry4_8:
.globl C(Spr8Entry4_8)
C(Spr8Entry4_8):
subl $4,%edi // adjust for hardwired offsets
subl $8,%ecx
jmp LLEntry4_8
//----------------------------------------
.globl Spr8Entry5_8
Spr8Entry5_8:
.globl C(Spr8Entry5_8)
C(Spr8Entry5_8):
subl $3,%edi // adjust for hardwired offsets
subl $6,%ecx
jmp LLEntry5_8
//----------------------------------------
.globl Spr8Entry6_8
Spr8Entry6_8:
.globl C(Spr8Entry6_8)
C(Spr8Entry6_8):
subl $2,%edi // adjust for hardwired offsets
subl $4,%ecx
jmp LLEntry6_8
//----------------------------------------
.globl Spr8Entry7_8
Spr8Entry7_8:
.globl C(Spr8Entry7_8)
C(Spr8Entry7_8):
decl %edi // adjust for hardwired offsets
subl $2,%ecx
jmp LLEntry7_8
//----------------------------------------
.globl Spr8Entry8_8
Spr8Entry8_8:
.globl C(Spr8Entry8_8)
C(Spr8Entry8_8):
cmpw (%ecx),%bp
jl Lp9
movb (%esi),%al
@ -780,12 +780,12 @@ Spr8Entry8_8:
movw %bp,(%ecx)
movb %al,(%edi)
Lp9:
addl izistep,%ebp
addl C(izistep),%ebp
adcl $0,%ebp
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
addl C(sstep),%ebx
adcl C(advancetable)+4(,%eax,4),%esi
LLEntry7_8:
cmpw 2(%ecx),%bp
jl Lp10
@ -795,12 +795,12 @@ LLEntry7_8:
movw %bp,2(%ecx)
movb %al,1(%edi)
Lp10:
addl izistep,%ebp
addl C(izistep),%ebp
adcl $0,%ebp
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
addl C(sstep),%ebx
adcl C(advancetable)+4(,%eax,4),%esi
LLEntry6_8:
cmpw 4(%ecx),%bp
jl Lp11
@ -810,12 +810,12 @@ LLEntry6_8:
movw %bp,4(%ecx)
movb %al,2(%edi)
Lp11:
addl izistep,%ebp
addl C(izistep),%ebp
adcl $0,%ebp
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
addl C(sstep),%ebx
adcl C(advancetable)+4(,%eax,4),%esi
LLEntry5_8:
cmpw 6(%ecx),%bp
jl Lp12
@ -825,12 +825,12 @@ LLEntry5_8:
movw %bp,6(%ecx)
movb %al,3(%edi)
Lp12:
addl izistep,%ebp
addl C(izistep),%ebp
adcl $0,%ebp
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
addl C(sstep),%ebx
adcl C(advancetable)+4(,%eax,4),%esi
LLEntry4_8:
cmpw 8(%ecx),%bp
jl Lp13
@ -840,12 +840,12 @@ LLEntry4_8:
movw %bp,8(%ecx)
movb %al,4(%edi)
Lp13:
addl izistep,%ebp
addl C(izistep),%ebp
adcl $0,%ebp
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
addl C(sstep),%ebx
adcl C(advancetable)+4(,%eax,4),%esi
LLEntry3_8:
cmpw 10(%ecx),%bp
jl Lp14
@ -855,12 +855,12 @@ LLEntry3_8:
movw %bp,10(%ecx)
movb %al,5(%edi)
Lp14:
addl izistep,%ebp
addl C(izistep),%ebp
adcl $0,%ebp
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
addl C(sstep),%ebx
adcl C(advancetable)+4(,%eax,4),%esi
LLEntry2_8:
cmpw 12(%ecx),%bp
jl Lp15
@ -870,12 +870,12 @@ LLEntry2_8:
movw %bp,12(%ecx)
movb %al,6(%edi)
Lp15:
addl izistep,%ebp
addl C(izistep),%ebp
adcl $0,%ebp
addl tstep,%edx
addl C(tstep),%edx
sbbl %eax,%eax
addl sstep,%ebx
adcl advancetable+4(,%eax,4),%esi
addl C(sstep),%ebx
adcl C(advancetable)+4(,%eax,4),%esi
LEndSpan:
cmpw 14(%ecx),%bp

View file

@ -95,39 +95,39 @@ C(d_zwidth): .long 0
//-------------------------------------------------------
// ASM-only variables
//-------------------------------------------------------
.globl izi
izi: .long 0
.globl C(izi)
C(izi): .long 0
.globl pbase, s, t, sfracf, tfracf, snext, tnext
.globl spancountminus1, zi16stepu, sdivz16stepu, tdivz16stepu
.globl zi8stepu, sdivz8stepu, tdivz8stepu, pz
s: .long 0
t: .long 0
snext: .long 0
tnext: .long 0
sfracf: .long 0
tfracf: .long 0
pbase: .long 0
zi8stepu: .long 0
sdivz8stepu: .long 0
tdivz8stepu: .long 0
zi16stepu: .long 0
sdivz16stepu: .long 0
tdivz16stepu: .long 0
spancountminus1: .long 0
pz: .long 0
.globl C(pbase), C(s), C(t), C(sfracf), C(tfracf), C(snext), C(tnext)
.globl C(spancountminus1), C(zi16stepu), C(sdivz16stepu), C(tdivz16stepu)
.globl C(zi8stepu), C(sdivz8stepu), C(tdivz8stepu), C(pz)
C(s): .long 0
C(t): .long 0
C(snext): .long 0
C(tnext): .long 0
C(sfracf): .long 0
C(tfracf): .long 0
C(pbase): .long 0
C(zi8stepu): .long 0
C(sdivz8stepu): .long 0
C(tdivz8stepu): .long 0
C(zi16stepu): .long 0
C(sdivz16stepu): .long 0
C(tdivz16stepu): .long 0
C(spancountminus1): .long 0
C(pz): .long 0
.globl izistep
izistep: .long 0
.globl C(izistep)
C(izistep): .long 0
//-------------------------------------------------------
// local variables for d_draw16.s
//-------------------------------------------------------
.globl reciprocal_table_16
.globl C(reciprocal_table_16)
// 1/2, 1/3, 1/4, 1/5, 1/6, 1/7, 1/8, 1/9, 1/10, 1/11, 1/12, 1/13,
// 1/14, and 1/15 in 0.32 form
reciprocal_table_16: .long 0x40000000, 0x2aaaaaaa, 0x20000000
C(reciprocal_table_16): .long 0x40000000, 0x2aaaaaaa, 0x20000000
.long 0x19999999, 0x15555555, 0x12492492
.long 0x10000000, 0xe38e38e, 0xccccccc, 0xba2e8ba
.long 0xaaaaaaa, 0x9d89d89, 0x9249249, 0x8888888
@ -136,68 +136,70 @@ reciprocal_table_16: .long 0x40000000, 0x2aaaaaaa, 0x20000000
//-------------------------------------------------------
// local variables for d_parta.s
//-------------------------------------------------------
.globl DP_Count, DP_u, DP_v, DP_32768, DP_Color, DP_Pix, DP_EntryTable
DP_Count: .long 0
DP_u: .long 0
DP_v: .long 0
DP_32768: .single 32768.0
DP_Color: .long 0
DP_Pix: .long 0
.globl C(DP_Count), C(DP_u), C(DP_v), C(DP_32768), C(DP_Color)
.globl C(DP_Pix), C(DP_EntryTable)
C(DP_Count): .long 0
C(DP_u): .long 0
C(DP_v): .long 0
C(DP_32768): .single 32768.0
C(DP_Color): .long 0
C(DP_Pix): .long 0
#ifndef NeXT
.extern DP_1x1
.extern DP_2x2
.extern DP_3x3
.extern DP_4x4
.extern C(DP_1x1)
.extern C(DP_2x2)
.extern C(DP_3x3)
.extern C(DP_4x4)
#endif
DP_EntryTable: .long DP_1x1, DP_2x2, DP_3x3, DP_4x4
C(DP_EntryTable): .long C(DP_1x1), C(DP_2x2), C(DP_3x3), C(DP_4x4)
//
// advancetable is 8 bytes, but points to the middle of that range so negative
// offsets will work
//
.globl advancetable, sstep, tstep, pspantemp, counttemp, jumptemp
advancetable: .long 0, 0
sstep: .long 0
tstep: .long 0
.globl C(advancetable), C(sstep), C(tstep), C(pspantemp)
.globl C(counttemp), C(jumptemp)
C(advancetable): .long 0, 0
C(sstep): .long 0
C(tstep): .long 0
pspantemp: .long 0
counttemp: .long 0
jumptemp: .long 0
C(pspantemp): .long 0
C(counttemp): .long 0
C(jumptemp): .long 0
// 1/2, 1/3, 1/4, 1/5, 1/6, and 1/7 in 0.32 form
.globl reciprocal_table, entryvec_table
reciprocal_table: .long 0x40000000, 0x2aaaaaaa, 0x20000000
.globl C(reciprocal_table), C(entryvec_table)
C(reciprocal_table): .long 0x40000000, 0x2aaaaaaa, 0x20000000
.long 0x19999999, 0x15555555, 0x12492492
#ifndef NeXT
.extern Entry2_8
.extern Entry3_8
.extern Entry4_8
.extern Entry5_8
.extern Entry6_8
.extern Entry7_8
.extern Entry8_8
.extern C(Entry2_8)
.extern C(Entry3_8)
.extern C(Entry4_8)
.extern C(Entry5_8)
.extern C(Entry6_8)
.extern C(Entry7_8)
.extern C(Entry8_8)
#endif
entryvec_table: .long 0, Entry2_8, Entry3_8, Entry4_8
.long Entry5_8, Entry6_8, Entry7_8, Entry8_8
C(entryvec_table): .long 0, C(Entry2_8), C(Entry3_8), C(Entry4_8)
.long C(Entry5_8), C(Entry6_8), C(Entry7_8), C(Entry8_8)
#ifndef NeXT
.extern Spr8Entry2_8
.extern Spr8Entry3_8
.extern Spr8Entry4_8
.extern Spr8Entry5_8
.extern Spr8Entry6_8
.extern Spr8Entry7_8
.extern Spr8Entry8_8
.extern C(Spr8Entry2_8)
.extern C(Spr8Entry3_8)
.extern C(Spr8Entry4_8)
.extern C(Spr8Entry5_8)
.extern C(Spr8Entry6_8)
.extern C(Spr8Entry7_8)
.extern C(Spr8Entry8_8)
#endif
.globl spr8entryvec_table
spr8entryvec_table: .long 0, Spr8Entry2_8, Spr8Entry3_8, Spr8Entry4_8
.long Spr8Entry5_8, Spr8Entry6_8, Spr8Entry7_8, Spr8Entry8_8
.globl C(spr8entryvec_table)
C(spr8entryvec_table): .long 0, C(Spr8Entry2_8), C(Spr8Entry3_8), C(Spr8Entry4_8)
.long C(Spr8Entry5_8), C(Spr8Entry6_8), C(Spr8Entry7_8), C(Spr8Entry8_8)
#endif // USE_INTEL_ASM

View file

@ -97,30 +97,30 @@ F_END(unmaskexceptions)
.data
.align 4
.globl ceil_cw, single_cw, full_cw, cw, pushed_cw
ceil_cw: .long 0
single_cw: .long 0
full_cw: .long 0
cw: .long 0
pushed_cw: .long 0
.globl C(ceil_cw), C(single_cw), C(full_cw), C(cw), C(pushed_cw)
C(ceil_cw): .long 0
C(single_cw): .long 0
C(full_cw): .long 0
C(cw): .long 0
C(pushed_cw): .long 0
#ifdef PIC
.type ceil_cw,@object
.type single_cw,@object
.type full_cw,@object
.type cw,@object
.type pushed_cw,@object
.size ceil_cw,4
.size single_cw,4
.size full_cw,4
.size cw,4
.size pushed_cw,4
.type C(ceil_cw),@object
.type C(single_cw),@object
.type C(full_cw),@object
.type C(cw),@object
.type C(pushed_cw),@object
.size C(ceil_cw),4
.size C(single_cw),4
.size C(full_cw),4
.size C(cw),4
.size C(pushed_cw),4
#endif
.text
F_BEGIN(R_LowFPPrecision)
got_base(3)
fldcw got_var(single_cw)
fldcw got_var(C(single_cw))
ret
F_END(R_LowFPPrecision)
@ -128,7 +128,7 @@ F_END(R_LowFPPrecision)
F_BEGIN(R_HighFPPrecision)
got_base(4)
fldcw got_var(full_cw)
fldcw got_var(C(full_cw))
ret
F_END(R_HighFPPrecision)
@ -136,19 +136,19 @@ F_END(R_HighFPPrecision)
F_BEGIN(R_SetFPCW)
got_base(7)
fnstcw got_var(cw)
movl got_var(cw),%eax
fnstcw got_var(C(cw))
movl got_var(C(cw)),%eax
andb $0xF0,%ah
orb $0x03,%ah // round mode, 64-bit precision
movl %eax,got_var(full_cw)
movl %eax,got_var(C(full_cw))
andb $0xF0,%ah
orb $0x0C,%ah // chop mode, single precision
movl %eax,got_var(single_cw)
movl %eax,got_var(C(single_cw))
andb $0xF0,%ah
orb $0x08,%ah // ceil mode, single precision
movl %eax,got_var(ceil_cw)
movl %eax,got_var(C(ceil_cw))
ret
F_END(R_SetFPCW)

View file

@ -68,11 +68,11 @@ C(R_Alias_clip_bottom):
popl %eax
addl $C(_GLOBAL_OFFSET_TABLE_)-1+[.-.Lpic1],%eax
movl float_point5@GOTOFF(%eax),%edx
movl C(float_point5)@GOTOFF(%eax),%edx
movl C(r_refdef)@GOT(%eax),%eax
#else
leal C(r_refdef),%eax
movl float_point5,%edx
movl C(float_point5),%edx
#endif
movl rd_aliasvrectbottom(%eax),%eax
movl %edx,point5(%esp)
@ -209,11 +209,11 @@ C(R_Alias_clip_top):
popl %eax
addl $C(_GLOBAL_OFFSET_TABLE_)-1+[.-.Lpic2],%eax
movl float_point5@GOTOFF(%eax),%edx
movl C(float_point5)@GOTOFF(%eax),%edx
movl C(r_refdef)@GOT(%eax),%eax
#else
leal C(r_refdef),%eax
movl float_point5,%edx
movl C(float_point5),%edx
#endif
movl rd_aliasvrect+4(%eax),%eax
movl %edx,point5(%esp)
@ -245,11 +245,11 @@ C(R_Alias_clip_right):
popl %eax
addl $C(_GLOBAL_OFFSET_TABLE_)-1+[.-.Lpic3],%eax
movl float_point5@GOTOFF(%eax),%edx
movl C(float_point5)@GOTOFF(%eax),%edx
movl C(r_refdef)@GOT(%eax),%eax
#else
leal C(r_refdef),%eax
movl float_point5,%edx
movl C(float_point5),%edx
#endif
movl rd_aliasvrectright(%eax),%eax
movl %edx,point5(%esp)
@ -300,11 +300,11 @@ C(R_Alias_clip_left):
popl %eax
addl $C(_GLOBAL_OFFSET_TABLE_)-1+[.-.Lpic4],%eax
movl float_point5@GOTOFF(%eax),%edx
movl C(float_point5)@GOTOFF(%eax),%edx
movl C(r_refdef)@GOT(%eax),%eax
#else
leal C(r_refdef),%eax
movl float_point5,%edx
movl C(float_point5),%edx
#endif
movl rd_aliasvrect+0(%eax),%eax
movl %edx,point5(%esp)

View file

@ -153,7 +153,7 @@ Lemit:
//
// FIXME: do away with by manually extracting integers from floats?
// FIXME: set less often
fldcw r_ceil_cw
fldcw C(r_ceil_cw)
// edge_t *edge, *pcheck;
// int u_check;
@ -220,7 +220,7 @@ LCalcSecond:
// r_ceilv1 = (int)(r_v1 - 2000) + 2000; // ceil(r_v1);
fistl C(r_ceilv1)
fldcw r_single_cw // put back normal floating-point state
fldcw C(r_single_cw) // put back normal floating-point state
fsts C(r_v1)
fxch %st(4) // lzi0 | lzi1 | u1 | v0 | v1 | u0
@ -398,11 +398,11 @@ LSideDone:
// edge->u_step = u_step*0x100000;
// edge->u = u*0x100000 + 0xFFFFF;
fmuls fp_1m // u*0x100000 | ustep
fmuls C(fp_1m) // u*0x100000 | ustep
fxch %st(1) // ustep | u*0x100000
fmuls fp_1m // ustep*0x100000 | u*0x100000
fmuls C(fp_1m) // ustep*0x100000 | u*0x100000
fxch %st(1) // u*0x100000 | ustep*0x100000
fadds fp_1m_minus_1 // u*0x100000 + 0xFFFFF | ustep*0x100000
fadds C(fp_1m_minus_1) // u*0x100000 + 0xFFFFF | ustep*0x100000
fxch %st(1) // ustep*0x100000 | u*0x100000 + 0xFFFFF
fistpl et_u_step(%edi) // u*0x100000 + 0xFFFFF
fistpl et_u(%edi)
@ -784,7 +784,7 @@ LTransformAndProject:
LNoClip:
fdivrs float_1 // lzi0 | x | y
fdivrs C(float_1) // lzi0 | x | y
fxch %st(1) // x | lzi0 | y
// // FIXME: build x/yscale into transform?

View file

@ -41,43 +41,43 @@
//-------------------------------------------------------
// ASM-only variables
//-------------------------------------------------------
.globl float_1, float_particle_z_clip, float_point5
.globl float_minus_1, float_0
float_0: .single 0.0
float_1: .single 1.0
float_minus_1: .single -1.0
float_particle_z_clip: .single PARTICLE_Z_CLIP
float_point5: .single 0.5
.globl C(float_1), C(float_particle_z_clip), C(float_point5)
.globl C(float_minus_1), C(float_0)
C(float_0): .single 0.0
C(float_1): .single 1.0
C(float_minus_1): .single -1.0
C(float_particle_z_clip): .single PARTICLE_Z_CLIP
C(float_point5): .single 0.5
.globl fp_16, fp_64k, fp_1m, fp_64kx64k
.globl fp_1m_minus_1
.globl fp_8
fp_1m: .single 1048576.0
fp_1m_minus_1: .single 1048575.0
fp_64k: .single 65536.0
fp_8: .single 8.0
fp_16: .single 16.0
fp_64kx64k: .long 0x4f000000 // (float)0x8000*0x10000
.globl C(fp_16), C(fp_64k), C(fp_1m), C(fp_64kx64k)
.globl C(fp_1m_minus_1)
.globl C(fp_8)
C(fp_1m): .single 1048576.0
C(fp_1m_minus_1): .single 1048575.0
C(fp_64k): .single 65536.0
C(fp_8): .single 8.0
C(fp_16): .single 16.0
C(fp_64kx64k): .long 0x4f000000 // (float)0x8000*0x10000
.globl FloatZero, Float2ToThe31nd, FloatMinus2ToThe31nd
FloatZero: .long 0
Float2ToThe31nd: .long 0x4f000000
FloatMinus2ToThe31nd: .long 0xcf000000
.globl C(FloatZero), C(Float2ToThe31nd), C(FloatMinus2ToThe31nd)
C(FloatZero): .long 0
C(Float2ToThe31nd): .long 0x4f000000
C(FloatMinus2ToThe31nd): .long 0xcf000000
.globl r_ceil_cw, r_single_cw
r_ceil_cw: .long 0
r_single_cw: .long 0
.globl C(r_ceil_cw), C(r_single_cw)
C(r_ceil_cw): .long 0
C(r_single_cw): .long 0
.globl C(r_bmodelactive)
C(r_bmodelactive): .long 0
.global C(R_InitVars)
C(R_InitVars):
movl ceil_cw, %eax
movl single_cw, %edx
movl %eax, r_ceil_cw
movl %edx, r_single_cw
movl C(ceil_cw), %eax
movl C(single_cw), %edx
movl %eax, C(r_ceil_cw)
movl %edx, C(r_single_cw)
ret
#endif // USE_INTEL_ASM