diff --git a/libs/video/renderer/sw/d_draw.S b/libs/video/renderer/sw/d_draw.S index 09864903c..15e33d33c 100644 --- a/libs/video/renderer/sw/d_draw.S +++ b/libs/video/renderer/sw/d_draw.S @@ -112,17 +112,17 @@ C(D_DrawSpans8): // // TODO: any overlap from rearranging? flds C(d_sdivzstepu) - fmuls fp_8 + fmuls C(fp_8) movl C(cacheblock),%edx flds C(d_tdivzstepu) - fmuls fp_8 + fmuls C(fp_8) movl pspans(%esp),%ebx // point to the first span descriptor flds C(d_zistepu) - fmuls fp_8 - movl %edx,pbase // pbase = cacheblock - fstps zi8stepu - fstps tdivz8stepu - fstps sdivz8stepu + fmuls C(fp_8) + movl %edx,C(pbase) // pbase = cacheblock + fstps C(zi8stepu) + fstps C(tdivz8stepu) + fstps C(sdivz8stepu) LSpanLoop: // @@ -175,7 +175,7 @@ LSpanLoop: fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z - flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z + flds C(fp_64k) // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv + // du*d_zistepu; stays in %st(0) at end @@ -190,7 +190,7 @@ LSpanLoop: // movl C(d_viewbuffer),%ecx movl espan_t_v(%ebx),%eax - movl %ebx,pspantemp // preserve spans pointer + movl %ebx,C(pspantemp) // preserve spans pointer movl C(tadjust),%edx movl C(sadjust),%esi @@ -208,7 +208,7 @@ LSpanLoop: decl %ecx jz LCleanup1 // if only one pixel, no need to start an FDIV - movl %ecx,spancountminus1 + movl %ecx,C(spancountminus1) // finish up the s and t calcs fxch %st(1) // z*64k | 1/z | t/z | s/z @@ -218,10 +218,10 @@ LSpanLoop: fxch %st(1) // z*64k | s | 1/z | t/z | s/z fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z fxch %st(1) // s | t | 1/z | t/z | s/z - fistpl s // 1/z | t | t/z | s/z - fistpl t // 1/z | t/z | s/z + fistpl C(s) // 1/z | t | t/z | s/z + fistpl C(t) // 1/z | t/z | s/z - fildl spancountminus1 + fildl C(spancountminus1) flds C(d_tdivzstepu) // C(d_tdivzstepu) | spancountminus1 flds C(d_zistepu) // C(d_zistepu) | C(d_tdivzstepu) | spancountminus1 @@ -238,7 +238,7 @@ LSpanLoop: faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 faddp %st(0),%st(3) - flds fp_64k + flds C(fp_64k) fdiv %st(1),%st(0) // this is what we've gone to all this trouble to // overlap jmp LFDIVInFlight1 @@ -252,8 +252,8 @@ LCleanup1: fxch %st(1) // z*64k | s | 1/z | t/z | s/z fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z fxch %st(1) // s | t | 1/z | t/z | s/z - fistpl s // 1/z | t | t/z | s/z - fistpl t // 1/z | t/z | s/z + fistpl C(s) // 1/z | t | t/z | s/z + fistpl C(t) // 1/z | t/z | s/z jmp LFDIVInFlight1 .align 4 @@ -266,41 +266,41 @@ LSetupNotLast1: fxch %st(1) // z*64k | s | 1/z | t/z | s/z fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z fxch %st(1) // s | t | 1/z | t/z | s/z - fistpl s // 1/z | t | t/z | s/z - fistpl t // 1/z | t/z | s/z + fistpl C(s) // 1/z | t | t/z | s/z + fistpl C(t) // 1/z | t/z | s/z - fadds zi8stepu + fadds C(zi8stepu) fxch %st(2) - fadds sdivz8stepu + fadds C(sdivz8stepu) fxch %st(2) - flds tdivz8stepu + flds C(tdivz8stepu) faddp %st(0),%st(2) - flds fp_64k + flds C(fp_64k) fdiv %st(1),%st(0) // z = 1/1/z // this is what we've gone to all this trouble to // overlap LFDIVInFlight1: - addl s,%esi - addl t,%edx + addl C(s),%esi + addl C(t),%edx movl C(bbextents),%ebx movl C(bbextentt),%ebp cmpl %ebx,%esi ja LClampHighOrLow0 LClampReentry0: - movl %esi,s - movl pbase,%ebx + movl %esi,C(s) + movl C(pbase),%ebx shll $16,%esi cmpl %ebp,%edx - movl %esi,sfracf + movl %esi,C(sfracf) ja LClampHighOrLow1 LClampReentry1: - movl %edx,t - movl s,%esi // sfrac = scans->sfrac; + movl %edx,C(t) + movl C(s),%esi // sfrac = scans->sfrac; shll $16,%edx - movl t,%eax // tfrac = scans->tfrac; + movl C(t),%eax // tfrac = scans->tfrac; sarl $16,%esi - movl %edx,tfracf + movl %edx,C(tfracf) // // calculate the texture starting address @@ -335,15 +335,15 @@ LNotLastSegment: fxch %st(1) fmul %st(3),%st(0) // t = t/z * z fxch %st(1) - fistpl snext - fistpl tnext - movl snext,%eax - movl tnext,%edx + fistpl C(snext) + fistpl C(tnext) + movl C(snext),%eax + movl C(tnext),%edx movb (%esi),%bl // get first source texel subl $8,%ecx // count off this segments' pixels movl C(sadjust),%ebp - movl %ecx,counttemp // remember count of remaining pixels + movl %ecx,C(counttemp) // remember count of remaining pixels movl C(tadjust),%ecx movb %bl,(%edi) // store first dest pixel @@ -366,11 +366,11 @@ LClampReentry2: ja LClampHigh3 LClampReentry3: - movl %ebp,snext - movl %ecx,tnext + movl %ebp,C(snext) + movl %ecx,C(tnext) - subl s,%ebp - subl t,%ecx + subl C(s),%ebp + subl C(t),%ecx // // set up advancetable @@ -392,61 +392,61 @@ LSetUp1: addl %edx,%eax // add in sstep // (tstep >> 16) * cachewidth + (sstep >> 16); - movl tfracf,%edx - movl %eax,advancetable+4 // advance base in t + movl C(tfracf),%edx + movl %eax,C(advancetable)+4 // advance base in t addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth + // (sstep >> 16); shll $13,%ebp // left-justify sstep fractional part - movl sfracf,%ebx + movl C(sfracf),%ebx shll $13,%ecx // left-justify tstep fractional part - movl %eax,advancetable // advance extra in t + movl %eax,C(advancetable) // advance extra in t - movl %ecx,tstep + movl %ecx,C(tstep) addl %ecx,%edx // advance tfrac fractional part by tstep frac sbbl %ecx,%ecx // turn tstep carry into -1 (0 if none) addl %ebp,%ebx // advance sfrac fractional part by sstep frac - adcl advancetable+4(,%ecx,4),%esi // point to next source texel + adcl C(advancetable)+4(,%ecx,4),%esi // point to next source texel - addl tstep,%edx + addl C(tstep),%edx sbbl %ecx,%ecx movb (%esi),%al addl %ebp,%ebx movb %al,1(%edi) - adcl advancetable+4(,%ecx,4),%esi + adcl C(advancetable)+4(,%ecx,4),%esi - addl tstep,%edx + addl C(tstep),%edx sbbl %ecx,%ecx addl %ebp,%ebx movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi + adcl C(advancetable)+4(,%ecx,4),%esi - addl tstep,%edx + addl C(tstep),%edx sbbl %ecx,%ecx movb %al,2(%edi) addl %ebp,%ebx movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi + adcl C(advancetable)+4(,%ecx,4),%esi - addl tstep,%edx + addl C(tstep),%edx sbbl %ecx,%ecx movb %al,3(%edi) addl %ebp,%ebx movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi + adcl C(advancetable)+4(,%ecx,4),%esi // // start FDIV for end of next segment in flight, so it can overlap // - movl counttemp,%ecx + movl C(counttemp),%ecx cmpl $8,%ecx // more than one segment after this? ja LSetupNotLast2 // yes decl %ecx jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV - movl %ecx,spancountminus1 - fildl spancountminus1 + movl %ecx,C(spancountminus1) + fildl C(spancountminus1) flds C(d_zistepu) // C(d_zistepu) | spancountminus1 fmul %st(1),%st(0) // C(d_zistepu)*scm1 | scm1 @@ -458,7 +458,7 @@ LSetUp1: fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1 fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1 faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 - flds fp_64k // 64k | C(d_sdivzstepu)*scm1 + flds C(fp_64k) // 64k | C(d_sdivzstepu)*scm1 fxch %st(1) // C(d_sdivzstepu)*scm1 | 64k faddp %st(0),%st(4) // 64k @@ -468,49 +468,49 @@ LSetUp1: .align 4 LSetupNotLast2: - fadds zi8stepu + fadds C(zi8stepu) fxch %st(2) - fadds sdivz8stepu + fadds C(sdivz8stepu) fxch %st(2) - flds tdivz8stepu + flds C(tdivz8stepu) faddp %st(0),%st(2) - flds fp_64k + flds C(fp_64k) fdiv %st(1),%st(0) // z = 1/1/z // this is what we've gone to all this trouble to // overlap LFDIVInFlight2: - movl %ecx,counttemp + movl %ecx,C(counttemp) - addl tstep,%edx + addl C(tstep),%edx sbbl %ecx,%ecx movb %al,4(%edi) addl %ebp,%ebx movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi + adcl C(advancetable)+4(,%ecx,4),%esi - addl tstep,%edx + addl C(tstep),%edx sbbl %ecx,%ecx movb %al,5(%edi) addl %ebp,%ebx movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi + adcl C(advancetable)+4(,%ecx,4),%esi - addl tstep,%edx + addl C(tstep),%edx sbbl %ecx,%ecx movb %al,6(%edi) addl %ebp,%ebx movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi + adcl C(advancetable)+4(,%ecx,4),%esi addl $8,%edi - movl %edx,tfracf - movl snext,%edx - movl %ebx,sfracf - movl tnext,%ebx - movl %edx,s - movl %ebx,t + movl %edx,C(tfracf) + movl C(snext),%edx + movl %ebx,C(sfracf) + movl C(tnext),%ebx + movl %edx,C(s) + movl %ebx,C(t) - movl counttemp,%ecx // retrieve count + movl C(counttemp),%ecx // retrieve count // // determine whether last span or not @@ -540,16 +540,16 @@ LLastSegment: fxch %st(1) fmul %st(3),%st(0) // t = t/z * z fxch %st(1) - fistpl snext - fistpl tnext + fistpl C(snext) + fistpl C(tnext) movb (%esi),%al // load first texel in segment movl C(tadjust),%ebx movb %al,(%edi) // store first pixel in segment movl C(sadjust),%eax - addl snext,%eax - addl tnext,%ebx + addl C(snext),%eax + addl C(tnext),%ebx movl C(bbextents),%ebp movl C(bbextentt),%edx @@ -559,7 +559,7 @@ LLastSegment: cmpl %ebp,%eax ja LClampHigh4 LClampReentry4: - movl %eax,snext + movl %eax,C(snext) cmpl $2048,%ebx jl LClampLow5 @@ -570,25 +570,25 @@ LClampReentry5: cmpl $1,%ecx // don't bother je LOnlyOneStep // if two pixels in segment, there's only one step, // of the segment length - subl s,%eax - subl t,%ebx + subl C(s),%eax + subl C(t),%ebx addl %eax,%eax // convert to 15.17 format so multiply by 1.31 addl %ebx,%ebx // reciprocal yields 16.48 - imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1) + imull C(reciprocal_table)-8(,%ecx,4) // sstep = (snext - s) / (spancount-1) movl %edx,%ebp movl %ebx,%eax - imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1) + imull C(reciprocal_table)-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1) LSetEntryvec: // // set up advancetable // - movl entryvec_table(,%ecx,4),%ebx + movl C(entryvec_table)(,%ecx,4),%ebx movl %edx,%eax - movl %ebx,jumptemp // entry point into code for RET later + movl %ebx,C(jumptemp) // entry point into code for RET later movl %ebp,%ecx sarl $16,%edx // tstep >>= 16; movl C(cachewidth),%ebx @@ -597,23 +597,23 @@ LSetEntryvec: addl %ecx,%edx // add in sstep // (tstep >> 16) * cachewidth + (sstep >> 16); - movl tfracf,%ecx - movl %edx,advancetable+4 // advance base in t + movl C(tfracf),%ecx + movl %edx,C(advancetable)+4 // advance base in t addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth + // (sstep >> 16); shll $16,%ebp // left-justify sstep fractional part - movl sfracf,%ebx + movl C(sfracf),%ebx shll $16,%eax // left-justify tstep fractional part - movl %edx,advancetable // advance extra in t + movl %edx,C(advancetable) // advance extra in t - movl %eax,tstep + movl %eax,C(tstep) movl %ecx,%edx addl %eax,%edx sbbl %ecx,%ecx addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi + adcl C(advancetable)+4(,%ecx,4),%esi - jmp *jumptemp // jump to the number-of-pixels handler + jmp *C(jumptemp) // jump to the number-of-pixels handler //---------------------------------------- @@ -624,128 +624,128 @@ LNoSteps: LOnlyOneStep: - subl s,%eax - subl t,%ebx + subl C(s),%eax + subl C(t),%ebx movl %eax,%ebp movl %ebx,%edx jmp LSetEntryvec //---------------------------------------- -.globl Entry2_8 -Entry2_8: +.globl C(Entry2_8) +C(Entry2_8): subl $6,%edi // adjust for hardwired offsets movb (%esi),%al jmp LLEntry2_8 //---------------------------------------- -.globl Entry3_8 -Entry3_8: +.globl C(Entry3_8) +C(Entry3_8): subl $5,%edi // adjust for hardwired offsets addl %eax,%edx movb (%esi),%al sbbl %ecx,%ecx addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi + adcl C(advancetable)+4(,%ecx,4),%esi jmp LLEntry3_8 //---------------------------------------- -.globl Entry4_8 -Entry4_8: +.globl C(Entry4_8) +C(Entry4_8): subl $4,%edi // adjust for hardwired offsets addl %eax,%edx movb (%esi),%al sbbl %ecx,%ecx addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx + adcl C(advancetable)+4(,%ecx,4),%esi + addl C(tstep),%edx jmp LLEntry4_8 //---------------------------------------- -.globl Entry5_8 -Entry5_8: +.globl C(Entry5_8) +C(Entry5_8): subl $3,%edi // adjust for hardwired offsets addl %eax,%edx movb (%esi),%al sbbl %ecx,%ecx addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx + adcl C(advancetable)+4(,%ecx,4),%esi + addl C(tstep),%edx jmp LLEntry5_8 //---------------------------------------- -.globl Entry6_8 -Entry6_8: +.globl C(Entry6_8) +C(Entry6_8): subl $2,%edi // adjust for hardwired offsets addl %eax,%edx movb (%esi),%al sbbl %ecx,%ecx addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx + adcl C(advancetable)+4(,%ecx,4),%esi + addl C(tstep),%edx jmp LLEntry6_8 //---------------------------------------- -.globl Entry7_8 -Entry7_8: +.globl C(Entry7_8) +C(Entry7_8): decl %edi // adjust for hardwired offsets addl %eax,%edx movb (%esi),%al sbbl %ecx,%ecx addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx + adcl C(advancetable)+4(,%ecx,4),%esi + addl C(tstep),%edx jmp LLEntry7_8 //---------------------------------------- -.globl Entry8_8 -Entry8_8: +.globl C(Entry8_8) +C(Entry8_8): addl %eax,%edx movb (%esi),%al sbbl %ecx,%ecx addl %ebp,%ebx - adcl advancetable+4(,%ecx,4),%esi + adcl C(advancetable)+4(,%ecx,4),%esi - addl tstep,%edx + addl C(tstep),%edx sbbl %ecx,%ecx movb %al,1(%edi) addl %ebp,%ebx movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx + adcl C(advancetable)+4(,%ecx,4),%esi + addl C(tstep),%edx LLEntry7_8: sbbl %ecx,%ecx movb %al,2(%edi) addl %ebp,%ebx movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx + adcl C(advancetable)+4(,%ecx,4),%esi + addl C(tstep),%edx LLEntry6_8: sbbl %ecx,%ecx movb %al,3(%edi) addl %ebp,%ebx movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx + adcl C(advancetable)+4(,%ecx,4),%esi + addl C(tstep),%edx LLEntry5_8: sbbl %ecx,%ecx movb %al,4(%edi) addl %ebp,%ebx movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi - addl tstep,%edx + adcl C(advancetable)+4(,%ecx,4),%esi + addl C(tstep),%edx LLEntry4_8: sbbl %ecx,%ecx movb %al,5(%edi) addl %ebp,%ebx movb (%esi),%al - adcl advancetable+4(,%ecx,4),%esi + adcl C(advancetable)+4(,%ecx,4),%esi LLEntry3_8: movb %al,6(%edi) movb (%esi),%al @@ -760,7 +760,7 @@ LEndSpan: fstp %st(0) fstp %st(0) - movl pspantemp,%ebx // restore spans pointer + movl C(pspantemp),%ebx // restore spans pointer movl espan_t_pnext(%ebx),%ebx // point to next span testl %ebx,%ebx // any more spans? movb %al,7(%edi) @@ -811,10 +811,10 @@ C(D_DrawZSpans): testl %eax,%eax jz LFNegSpan - fmuls Float2ToThe31nd - fistpl izistep // note: we are relying on FP exceptions being turned + fmuls C(Float2ToThe31nd) + fistpl C(izistep) // note: we are relying on FP exceptions being turned // off here to avoid range problems - movl izistep,%ebx // remains loaded for all spans + movl C(izistep),%ebx // remains loaded for all spans LFSpanLoop: // set up the initial 1/z value @@ -831,7 +831,7 @@ LFSpanLoop: faddp %st(0),%st(1) // clamp if z is nearer than 2 (1/z > 0.5) - fcoms float_point5 + fcoms C(float_point5) addl %ecx,%edi movl espan_t_u(%esi),%edx addl %edx,%edx // word count @@ -842,11 +842,11 @@ LFSpanLoop: testb $0x45,%ah jz LClamp - fmuls Float2ToThe31nd - fistpl izi // note: we are relying on FP exceptions being turned + fmuls C(Float2ToThe31nd) + fistpl C(izi) // note: we are relying on FP exceptions being turned // off here to avoid problems when the span is closer // than 1/(2**31) - movl izi,%edx + movl C(izi),%edx // at this point: // %ebx = izistep @@ -926,10 +926,10 @@ LFSpanDone: jmp LFDone LFNegSpan: - fmuls FloatMinus2ToThe31nd - fistpl izistep // note: we are relying on FP exceptions being turned + fmuls C(FloatMinus2ToThe31nd) + fistpl C(izistep) // note: we are relying on FP exceptions being turned // off here to avoid range problems - movl izistep,%ebx // remains loaded for all spans + movl C(izistep),%ebx // remains loaded for all spans LFNegSpanLoop: // set up the initial 1/z value @@ -946,7 +946,7 @@ LFNegSpanLoop: faddp %st(0),%st(1) // clamp if z is nearer than 2 (1/z > 0.5) - fcoms float_point5 + fcoms C(float_point5) addl %ecx,%edi movl espan_t_u(%esi),%edx addl %edx,%edx // word count @@ -957,11 +957,11 @@ LFNegSpanLoop: testb $0x45,%ah jz LClampNeg - fmuls Float2ToThe31nd - fistpl izi // note: we are relying on FP exceptions being turned + fmuls C(Float2ToThe31nd) + fistpl C(izi) // note: we are relying on FP exceptions being turned // off here to avoid problems when the span is closer // than 1/(2**31) - movl izi,%edx + movl C(izi),%edx // at this point: // %ebx = izistep diff --git a/libs/video/renderer/sw/d_parta.S b/libs/video/renderer/sw/d_parta.S index 3a98cefe1..fb004638d 100644 --- a/libs/video/renderer/sw/d_parta.S +++ b/libs/video/renderer/sw/d_parta.S @@ -89,12 +89,12 @@ C(D_DrawParticle): faddp %st(0),%st(1) // z | local[0] | local[1] | local[2] fld %st(0) // z | z | local[0] | local[1] | // local[2] - fdivrs float_1 // 1/z | z | local[0] | local[1] | local[2] + fdivrs C(float_1) // 1/z | z | local[0] | local[1] | local[2] fxch %st(1) // z | 1/z | local[0] | local[1] | local[2] // if (transformed[2] < PARTICLE_Z_CLIP) // return; - fcomps float_particle_z_clip // 1/z | local[0] | local[1] | local[2] + fcomps C(float_particle_z_clip) // 1/z | local[0] | local[1] | local[2] fxch %st(3) // local[2] | local[0] | local[1] | 1/z flds C(r_pup) // r_pup[0] | local[2] | local[0] | local[1] | 1/z @@ -141,20 +141,20 @@ C(D_DrawParticle): fadds C(xcenter) // u | v | 1/z // FIXME: preadjust xcenter and ycenter fxch %st(1) // v | u | 1/z - fadds float_point5 // v | u | 1/z + fadds C(float_point5) // v | u | 1/z fxch %st(1) // u | v | 1/z - fadds float_point5 // u | v | 1/z + fadds C(float_point5) // u | v | 1/z fxch %st(2) // 1/z | v | u - fmuls DP_32768 // 1/z * 0x8000 | v | u + fmuls C(DP_32768) // 1/z * 0x8000 | v | u fxch %st(2) // u | v | 1/z * 0x8000 // FIXME: use Terje's fp->int trick here? // FIXME: check we're getting proper rounding here - fistpl DP_u // v | 1/z * 0x8000 - fistpl DP_v // 1/z * 0x8000 + fistpl C(DP_u) // v | 1/z * 0x8000 + fistpl C(DP_v) // 1/z * 0x8000 - movl DP_u,%eax - movl DP_v,%edx + movl C(DP_u),%eax + movl C(DP_v),%edx // if ((v > d_vrectbottom_particle) || // (u > d_vrectright_particle) || @@ -179,7 +179,7 @@ C(D_DrawParticle): jl LPop1AndDone flds pt_color(%edi) // color | 1/z * 0x8000 - fstps DP_Color // 1/z * 0x8000 + fstps C(DP_Color) // 1/z * 0x8000 movl C(d_viewbuffer),%ebx @@ -191,17 +191,17 @@ C(D_DrawParticle): leal (%edx,%eax,2),%edx movl C(d_pzbuffer),%eax - fistpl izi + fistpl C(izi) addl %ebx,%edi addl %eax,%edx // pix = izi >> d_pix_shift; - movl izi,%eax + movl C(izi),%eax movl C(d_pix_shift),%ecx shrl %cl,%eax - movl izi,%ebp + movl C(izi),%ebp // if (pix < d_pix_min) // pix = d_pix_min; @@ -221,7 +221,7 @@ LTestPixMax: movl %ecx,%eax LTestDone: - movb DP_Color,%ch + movb C(DP_Color),%ch movl C(d_y_aspect_shift),%ebx testl %ebx,%ebx @@ -230,11 +230,11 @@ LTestDone: cmpl $4,%eax ja LDefault - jmp *DP_EntryTable-4(,%eax,4) + jmp *C(DP_EntryTable)-4(,%eax,4) // 1x1 -.globl DP_1x1 -DP_1x1: +.globl C(DP_1x1) +C(DP_1x1): cmpw %bp,(%edx) // just one pixel to do jg LDone movw %bp,(%edx) @@ -242,8 +242,8 @@ DP_1x1: jmp LDone // 2x2 -.globl DP_2x2 -DP_2x2: +.globl C(DP_2x2) +C(DP_2x2): pushl %esi movl C(screenwidth),%ebx movl C(d_zrowbytes),%esi @@ -273,8 +273,8 @@ L2x2_4: jmp LDone // 3x3 -.globl DP_3x3 -DP_3x3: +.globl C(DP_3x3) +C(DP_3x3): pushl %esi movl C(screenwidth),%ebx movl C(d_zrowbytes),%esi @@ -332,8 +332,8 @@ L3x3_9: // 4x4 -.globl DP_4x4 -DP_4x4: +.globl C(DP_4x4) +C(DP_4x4): pushl %esi movl C(screenwidth),%ebx movl C(d_zrowbytes),%esi @@ -434,7 +434,7 @@ LDefault: // count = pix << d_y_aspect_shift; movl %eax,%ebx - movl %eax,DP_Pix + movl %eax,C(DP_Pix) movb C(d_y_aspect_shift),%cl shll %cl,%ebx @@ -451,7 +451,7 @@ LDefault: // } LGenRowLoop: - movl DP_Pix,%eax + movl C(DP_Pix),%eax LGenColLoop: cmpw %bp,-2(%edx,%eax,2) diff --git a/libs/video/renderer/sw/d_polysa.S b/libs/video/renderer/sw/d_polysa.S index 20f090c72..8ea71258c 100644 --- a/libs/video/renderer/sw/d_polysa.S +++ b/libs/video/renderer/sw/d_polysa.S @@ -183,7 +183,7 @@ C(D_PolysetCalcGradients): // t1*p00_minus_p20 - t0*p10_minus_p20 | // t1*p01_minus_p21 - t0*p11_minus_p21 | // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fmuls float_minus_1 // ystepdenominv | + fmuls C(float_minus_1) // ystepdenominv | // t1*p00_minus_p20 - t0*p10_minus_p20 | // t1*p01_minus_p21 - t0*p11_minus_p21 | // xstepdenominv | p00_minus_p20 | p11_minus_p21 @@ -205,12 +205,12 @@ C(D_PolysetCalcGradients): // (t1*p01_minus_p21 - t0*p11_minus_p21)* // xstepdenominv | ystepdenominv | // xstepdenominv | p00_minus_p20 | p11_minus_p21 - fldcw r_ceil_cw + fldcw C(r_ceil_cw) fistpl C(r_lstepy) // r_lstepx | ystepdenominv | xstepdenominv | // p00_minus_p20 | p11_minus_p21 fistpl C(r_lstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | // p11_minus_p21 - fldcw r_single_cw + fldcw C(r_single_cw) // t0 = r_p0[2] - r_p2[2]; // t1 = r_p1[2] - r_p2[2]; @@ -822,16 +822,16 @@ LRightEdgeStepped: movl C(a_ststepxwhole),%ecx movl C(r_affinetridesc)+atd_skinwidth,%edx - movl %ecx,advancetable+4 // advance base in t + movl %ecx,C(advancetable)+4 // advance base in t addl %edx,%ecx - movl %ecx,advancetable // advance extra in t + movl %ecx,C(advancetable) // advance extra in t movl C(a_tstepxfrac),%ecx movw C(r_lstepx),%cx movl %eax,%edx // count - movl %ecx,tstep + movl %ecx,C(tstep) addl $7,%edx shrl $3,%edx // count of full and partial loops @@ -889,12 +889,12 @@ LDraw8: LPatch8: movb %al,(%edi) Lp1: - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx - adcl advancetable+4(,%eax,4),%esi + adcl C(advancetable)+4(,%eax,4),%esi LDraw7: cmpw 2(%ecx),%bp @@ -907,12 +907,12 @@ LDraw7: LPatch7: movb %al,1(%edi) Lp2: - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx - adcl advancetable+4(,%eax,4),%esi + adcl C(advancetable)+4(,%eax,4),%esi LDraw6: cmpw 4(%ecx),%bp @@ -925,12 +925,12 @@ LDraw6: LPatch6: movb %al,2(%edi) Lp3: - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx - adcl advancetable+4(,%eax,4),%esi + adcl C(advancetable)+4(,%eax,4),%esi LDraw5: cmpw 6(%ecx),%bp @@ -943,12 +943,12 @@ LDraw5: LPatch5: movb %al,3(%edi) Lp4: - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx - adcl advancetable+4(,%eax,4),%esi + adcl C(advancetable)+4(,%eax,4),%esi LDraw4: cmpw 8(%ecx),%bp @@ -961,12 +961,12 @@ LDraw4: LPatch4: movb %al,4(%edi) Lp5: - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx - adcl advancetable+4(,%eax,4),%esi + adcl C(advancetable)+4(,%eax,4),%esi LDraw3: cmpw 10(%ecx),%bp @@ -979,12 +979,12 @@ LDraw3: LPatch3: movb %al,5(%edi) Lp6: - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx - adcl advancetable+4(,%eax,4),%esi + adcl C(advancetable)+4(,%eax,4),%esi LDraw2: cmpw 12(%ecx),%bp @@ -997,12 +997,12 @@ LDraw2: LPatch2: movb %al,6(%edi) Lp7: - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx - adcl advancetable+4(,%eax,4),%esi + adcl C(advancetable)+4(,%eax,4),%esi LDraw1: cmpw 14(%ecx),%bp @@ -1015,12 +1015,12 @@ LDraw1: LPatch1: movb %al,7(%edi) Lp8: - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax addl lzistepx,%ebp adcl $0,%ebp addl C(a_sstepxfrac),%ebx - adcl advancetable+4(,%eax,4),%esi + adcl C(advancetable)+4(,%eax,4),%esi addl $8,%edi addl $16,%ecx @@ -1652,7 +1652,7 @@ LNDLoop: movl %eax,C(r_p0)+16 movl %esi,C(r_p0)+20 - fdivrs float_1 + fdivrs C(float_1) // r_p1[0] = index1->v[0]; // r_p1[1] = index1->v[1]; diff --git a/libs/video/renderer/sw/d_spr8.S b/libs/video/renderer/sw/d_spr8.S index 70abfc22c..5739f7ee1 100644 --- a/libs/video/renderer/sw/d_spr8.S +++ b/libs/video/renderer/sw/d_spr8.S @@ -108,25 +108,25 @@ C(D_SpriteDrawSpans): // // FIXME: any overlap from rearranging? flds C(d_sdivzstepu) - fmuls fp_8 + fmuls C(fp_8) movl C(cacheblock),%edx flds C(d_tdivzstepu) - fmuls fp_8 + fmuls C(fp_8) movl pspans(%esp),%ebx // point to the first span descriptor flds C(d_zistepu) - fmuls fp_8 - movl %edx,pbase // pbase = cacheblock + fmuls C(fp_8) + movl %edx,C(pbase) // pbase = cacheblock flds C(d_zistepu) - fmuls fp_64kx64k + fmuls C(fp_64kx64k) fxch %st(3) - fstps sdivz8stepu - fstps zi8stepu - fstps tdivz8stepu - fistpl izistep - movl izistep,%eax + fstps C(sdivz8stepu) + fstps C(zi8stepu) + fstps C(tdivz8stepu) + fistpl C(izistep) + movl C(izistep),%eax rorl $16,%eax // put upper 16 bits in low word movl sspan_t_count(%ebx),%ecx - movl %eax,izistep + movl %eax,C(izistep) cmpl $0,%ecx jle LNextSpan @@ -183,14 +183,14 @@ LSpanLoop: fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z - flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z + flds C(fp_64k) // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv + // du*d_zistepu; stays in %st(0) at end // 1/z | fp_64k | t/z | s/z fld %st(0) // FIXME: get rid of stall on FMUL? - fmuls fp_64kx64k + fmuls C(fp_64kx64k) fxch %st(1) // @@ -199,21 +199,21 @@ LSpanLoop: fdivr %st(0),%st(2) // 1/z | z*64k | t/z | s/z fxch %st(1) - fistpl izi // 0.32 fixed-point 1/z - movl izi,%ebp + fistpl C(izi) // 0.32 fixed-point 1/z + movl C(izi),%ebp // // set pz to point to the first z-buffer pixel in the span // rorl $16,%ebp // put upper 16 bits in low word movl sspan_t_v(%ebx),%eax - movl %ebp,izi + movl %ebp,C(izi) movl sspan_t_u(%ebx),%ebp imull C(d_zrowbytes) shll $1,%ebp // a word per pixel addl C(d_pzbuffer),%eax addl %ebp,%eax - movl %eax,pz + movl %eax,C(pz) // // point %edi to the first pixel in the span @@ -236,7 +236,7 @@ LSpanLoop: decl %ecx jz LCleanup1 // if only one pixel, no need to start an FDIV - movl %ecx,spancountminus1 + movl %ecx,C(spancountminus1) // finish up the s and t calcs fxch %st(1) // z*64k | 1/z | t/z | s/z @@ -246,10 +246,10 @@ LSpanLoop: fxch %st(1) // z*64k | s | 1/z | t/z | s/z fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z fxch %st(1) // s | t | 1/z | t/z | s/z - fistpl s // 1/z | t | t/z | s/z - fistpl t // 1/z | t/z | s/z + fistpl C(s) // 1/z | t | t/z | s/z + fistpl C(t) // 1/z | t/z | s/z - fildl spancountminus1 + fildl C(spancountminus1) flds C(d_tdivzstepu) // _d_tdivzstepu | spancountminus1 flds C(d_zistepu) // _d_zistepu | _d_tdivzstepu | spancountminus1 @@ -266,7 +266,7 @@ LSpanLoop: faddp %st(0),%st(3) // _d_sdivzstepu*scm1 faddp %st(0),%st(3) - flds fp_64k + flds C(fp_64k) fdiv %st(1),%st(0) // this is what we've gone to all this trouble to // overlap jmp LFDIVInFlight1 @@ -280,8 +280,8 @@ LCleanup1: fxch %st(1) // z*64k | s | 1/z | t/z | s/z fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z fxch %st(1) // s | t | 1/z | t/z | s/z - fistpl s // 1/z | t | t/z | s/z - fistpl t // 1/z | t/z | s/z + fistpl C(s) // 1/z | t | t/z | s/z + fistpl C(t) // 1/z | t/z | s/z jmp LFDIVInFlight1 .align 4 @@ -294,41 +294,41 @@ LSetupNotLast1: fxch %st(1) // z*64k | s | 1/z | t/z | s/z fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z fxch %st(1) // s | t | 1/z | t/z | s/z - fistpl s // 1/z | t | t/z | s/z - fistpl t // 1/z | t/z | s/z + fistpl C(s) // 1/z | t | t/z | s/z + fistpl C(t) // 1/z | t/z | s/z - fadds zi8stepu + fadds C(zi8stepu) fxch %st(2) - fadds sdivz8stepu + fadds C(sdivz8stepu) fxch %st(2) - flds tdivz8stepu + flds C(tdivz8stepu) faddp %st(0),%st(2) - flds fp_64k + flds C(fp_64k) fdiv %st(1),%st(0) // z = 1/1/z // this is what we've gone to all this trouble to // overlap LFDIVInFlight1: - addl s,%esi - addl t,%edx + addl C(s),%esi + addl C(t),%edx movl C(bbextents),%ebx movl C(bbextentt),%ebp cmpl %ebx,%esi ja LClampHighOrLow0 LClampReentry0: - movl %esi,s - movl pbase,%ebx + movl %esi,C(s) + movl C(pbase),%ebx shll $16,%esi cmpl %ebp,%edx - movl %esi,sfracf + movl %esi,C(sfracf) ja LClampHighOrLow1 LClampReentry1: - movl %edx,t - movl s,%esi // sfrac = scans->sfrac; + movl %edx,C(t) + movl C(s),%esi // sfrac = scans->sfrac; shll $16,%edx - movl t,%eax // tfrac = scans->tfrac; + movl C(t),%eax // tfrac = scans->tfrac; sarl $16,%esi - movl %edx,tfracf + movl %edx,C(tfracf) // // calculate the texture starting address @@ -362,10 +362,10 @@ LNotLastSegment: fxch %st(1) fmul %st(3),%st(0) // t = t/z * z fxch %st(1) - fistpl snext - fistpl tnext - movl snext,%eax - movl tnext,%edx + fistpl C(snext) + fistpl C(tnext) + movl C(snext),%eax + movl C(tnext),%edx subl $8,%ecx // count off this segments' pixels movl C(sadjust),%ebp @@ -390,11 +390,11 @@ LClampReentry2: ja LClampHigh3 LClampReentry3: - movl %ebp,snext - movl %ecx,tnext + movl %ebp,C(snext) + movl %ecx,C(tnext) - subl s,%ebp - subl t,%ecx + subl C(s),%ebp + subl C(t),%ecx // // set up advancetable @@ -409,19 +409,19 @@ LClampReentry3: LIsZero: addl %edx,%eax // add in sstep // (tstep >> 16) * cachewidth + (sstep >> 16); - movl tfracf,%edx - movl %eax,advancetable+4 // advance base in t + movl C(tfracf),%edx + movl %eax,C(advancetable)+4 // advance base in t addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth + // (sstep >> 16); shll $13,%ebp // left-justify sstep fractional part - movl %ebp,sstep - movl sfracf,%ebx + movl %ebp,C(sstep) + movl C(sfracf),%ebx shll $13,%ecx // left-justify tstep fractional part - movl %eax,advancetable // advance extra in t - movl %ecx,tstep + movl %eax,C(advancetable) // advance extra in t + movl %ecx,C(tstep) - movl pz,%ecx - movl izi,%ebp + movl C(pz),%ecx + movl C(izi),%ebp cmpw (%ecx),%bp jl Lp1 @@ -431,13 +431,13 @@ LIsZero: movw %bp,(%ecx) movb %al,(%edi) // store first dest pixel Lp1: - addl izistep,%ebp + addl C(izistep),%ebp adcl $0,%ebp - addl tstep,%edx // advance tfrac fractional part by tstep frac + addl C(tstep),%edx // advance tfrac fractional part by tstep frac sbbl %eax,%eax // turn tstep carry into -1 (0 if none) - addl sstep,%ebx // advance sfrac fractional part by sstep frac - adcl advancetable+4(,%eax,4),%esi // point to next source texel + addl C(sstep),%ebx // advance sfrac fractional part by sstep frac + adcl C(advancetable)+4(,%eax,4),%esi // point to next source texel cmpw 2(%ecx),%bp jl Lp2 @@ -447,12 +447,12 @@ Lp1: movw %bp,2(%ecx) movb %al,1(%edi) Lp2: - addl izistep,%ebp + addl C(izistep),%ebp adcl $0,%ebp - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi + addl C(sstep),%ebx + adcl C(advancetable)+4(,%eax,4),%esi cmpw 4(%ecx),%bp jl Lp3 @@ -462,12 +462,12 @@ Lp2: movw %bp,4(%ecx) movb %al,2(%edi) Lp3: - addl izistep,%ebp + addl C(izistep),%ebp adcl $0,%ebp - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi + addl C(sstep),%ebx + adcl C(advancetable)+4(,%eax,4),%esi cmpw 6(%ecx),%bp jl Lp4 @@ -477,12 +477,12 @@ Lp3: movw %bp,6(%ecx) movb %al,3(%edi) Lp4: - addl izistep,%ebp + addl C(izistep),%ebp adcl $0,%ebp - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi + addl C(sstep),%ebx + adcl C(advancetable)+4(,%eax,4),%esi cmpw 8(%ecx),%bp jl Lp5 @@ -492,12 +492,12 @@ Lp4: movw %bp,8(%ecx) movb %al,4(%edi) Lp5: - addl izistep,%ebp + addl C(izistep),%ebp adcl $0,%ebp - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi + addl C(sstep),%ebx + adcl C(advancetable)+4(,%eax,4),%esi // // start FDIV for end of next segment in flight, so it can overlap @@ -508,8 +508,8 @@ Lp5: decl %eax jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV - movl %eax,spancountminus1 - fildl spancountminus1 + movl %eax,C(spancountminus1) + fildl C(spancountminus1) flds C(d_zistepu) // _d_zistepu | spancountminus1 fmul %st(1),%st(0) // _d_zistepu*scm1 | scm1 @@ -521,7 +521,7 @@ Lp5: fmuls C(d_sdivzstepu) // _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1 fxch %st(1) // _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1 faddp %st(0),%st(3) // _d_sdivzstepu*scm1 - flds fp_64k // 64k | _d_sdivzstepu*scm1 + flds C(fp_64k) // 64k | _d_sdivzstepu*scm1 fxch %st(1) // _d_sdivzstepu*scm1 | 64k faddp %st(0),%st(4) // 64k @@ -531,13 +531,13 @@ Lp5: .align 4 LSetupNotLast2: - fadds zi8stepu + fadds C(zi8stepu) fxch %st(2) - fadds sdivz8stepu + fadds C(sdivz8stepu) fxch %st(2) - flds tdivz8stepu + flds C(tdivz8stepu) faddp %st(0),%st(2) - flds fp_64k + flds C(fp_64k) fdiv %st(1),%st(0) // z = 1/1/z // this is what we've gone to all this trouble to // overlap @@ -552,12 +552,12 @@ LFDIVInFlight2: movw %bp,10(%ecx) movb %al,5(%edi) Lp6: - addl izistep,%ebp + addl C(izistep),%ebp adcl $0,%ebp - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi + addl C(sstep),%ebx + adcl C(advancetable)+4(,%eax,4),%esi cmpw 12(%ecx),%bp jl Lp7 @@ -567,12 +567,12 @@ Lp6: movw %bp,12(%ecx) movb %al,6(%edi) Lp7: - addl izistep,%ebp + addl C(izistep),%ebp adcl $0,%ebp - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi + addl C(sstep),%ebx + adcl C(advancetable)+4(,%eax,4),%esi cmpw 14(%ecx),%bp jl Lp8 @@ -582,24 +582,24 @@ Lp7: movw %bp,14(%ecx) movb %al,7(%edi) Lp8: - addl izistep,%ebp + addl C(izistep),%ebp adcl $0,%ebp - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi + addl C(sstep),%ebx + adcl C(advancetable)+4(,%eax,4),%esi addl $8,%edi addl $16,%ecx - movl %edx,tfracf - movl snext,%edx - movl %ebx,sfracf - movl tnext,%ebx - movl %edx,s - movl %ebx,t + movl %edx,C(tfracf) + movl C(snext),%edx + movl %ebx,C(sfracf) + movl C(tnext),%ebx + movl %edx,C(s) + movl %ebx,C(t) - movl %ecx,pz - movl %ebp,izi + movl %ecx,C(pz) + movl %ebp,C(izi) popl %ecx // retrieve count @@ -630,14 +630,14 @@ LLastSegment: fxch %st(1) fmul %st(3),%st(0) // t = t/z * z fxch %st(1) - fistpl snext - fistpl tnext + fistpl C(snext) + fistpl C(tnext) movl C(tadjust),%ebx movl C(sadjust),%eax - addl snext,%eax - addl tnext,%ebx + addl C(snext),%eax + addl C(tnext),%ebx movl C(bbextents),%ebp movl C(bbextentt),%edx @@ -647,7 +647,7 @@ LLastSegment: cmpl %ebp,%eax ja LClampHigh4 LClampReentry4: - movl %eax,snext + movl %eax,C(snext) cmpl $2048,%ebx jl LClampLow5 @@ -658,22 +658,22 @@ LClampReentry5: cmpl $1,%ecx // don't bother je LOnlyOneStep // if two pixels in segment, there's only one step, // of the segment length - subl s,%eax - subl t,%ebx + subl C(s),%eax + subl C(t),%ebx addl %eax,%eax // convert to 15.17 format so multiply by 1.31 addl %ebx,%ebx // reciprocal yields 16.48 - imull reciprocal_table-8(,%ecx,4) // sstep = (snext - s) / (spancount-1) + imull C(reciprocal_table)-8(,%ecx,4) // sstep = (snext - s) / (spancount-1) movl %edx,%ebp movl %ebx,%eax - imull reciprocal_table-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1) + imull C(reciprocal_table)-8(,%ecx,4) // tstep = (tnext - t) / (spancount-1) LSetEntryvec: // // set up advancetable // - movl spr8entryvec_table(,%ecx,4),%ebx + movl C(spr8entryvec_table)(,%ecx,4),%ebx movl %edx,%eax pushl %ebx // entry point into code for RET later movl %ebp,%ecx @@ -685,44 +685,44 @@ LSetEntryvec: LIsZeroLast: addl %ecx,%edx // add in sstep // (tstep >> 16) * cachewidth + (sstep >> 16); - movl tfracf,%ecx - movl %edx,advancetable+4 // advance base in t + movl C(tfracf),%ecx + movl %edx,C(advancetable)+4 // advance base in t addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth + // (sstep >> 16); shll $16,%ebp // left-justify sstep fractional part - movl sfracf,%ebx + movl C(sfracf),%ebx shll $16,%eax // left-justify tstep fractional part - movl %edx,advancetable // advance extra in t + movl %edx,C(advancetable) // advance extra in t - movl %eax,tstep - movl %ebp,sstep + movl %eax,C(tstep) + movl %ebp,C(sstep) movl %ecx,%edx - movl pz,%ecx - movl izi,%ebp + movl C(pz),%ecx + movl C(izi),%ebp ret // jump to the number-of-pixels handler //---------------------------------------- LNoSteps: - movl pz,%ecx + movl C(pz),%ecx subl $7,%edi // adjust for hardwired offset subl $14,%ecx jmp LEndSpan LOnlyOneStep: - subl s,%eax - subl t,%ebx + subl C(s),%eax + subl C(t),%ebx movl %eax,%ebp movl %ebx,%edx jmp LSetEntryvec //---------------------------------------- -.globl Spr8Entry2_8 -Spr8Entry2_8: +.globl C(Spr8Entry2_8) +C(Spr8Entry2_8): subl $6,%edi // adjust for hardwired offsets subl $12,%ecx movb (%esi),%al @@ -730,48 +730,48 @@ Spr8Entry2_8: //---------------------------------------- -.globl Spr8Entry3_8 -Spr8Entry3_8: +.globl C(Spr8Entry3_8) +C(Spr8Entry3_8): subl $5,%edi // adjust for hardwired offsets subl $10,%ecx jmp LLEntry3_8 //---------------------------------------- -.globl Spr8Entry4_8 -Spr8Entry4_8: +.globl C(Spr8Entry4_8) +C(Spr8Entry4_8): subl $4,%edi // adjust for hardwired offsets subl $8,%ecx jmp LLEntry4_8 //---------------------------------------- -.globl Spr8Entry5_8 -Spr8Entry5_8: +.globl C(Spr8Entry5_8) +C(Spr8Entry5_8): subl $3,%edi // adjust for hardwired offsets subl $6,%ecx jmp LLEntry5_8 //---------------------------------------- -.globl Spr8Entry6_8 -Spr8Entry6_8: +.globl C(Spr8Entry6_8) +C(Spr8Entry6_8): subl $2,%edi // adjust for hardwired offsets subl $4,%ecx jmp LLEntry6_8 //---------------------------------------- -.globl Spr8Entry7_8 -Spr8Entry7_8: +.globl C(Spr8Entry7_8) +C(Spr8Entry7_8): decl %edi // adjust for hardwired offsets subl $2,%ecx jmp LLEntry7_8 //---------------------------------------- -.globl Spr8Entry8_8 -Spr8Entry8_8: +.globl C(Spr8Entry8_8) +C(Spr8Entry8_8): cmpw (%ecx),%bp jl Lp9 movb (%esi),%al @@ -780,12 +780,12 @@ Spr8Entry8_8: movw %bp,(%ecx) movb %al,(%edi) Lp9: - addl izistep,%ebp + addl C(izistep),%ebp adcl $0,%ebp - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi + addl C(sstep),%ebx + adcl C(advancetable)+4(,%eax,4),%esi LLEntry7_8: cmpw 2(%ecx),%bp jl Lp10 @@ -795,12 +795,12 @@ LLEntry7_8: movw %bp,2(%ecx) movb %al,1(%edi) Lp10: - addl izistep,%ebp + addl C(izistep),%ebp adcl $0,%ebp - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi + addl C(sstep),%ebx + adcl C(advancetable)+4(,%eax,4),%esi LLEntry6_8: cmpw 4(%ecx),%bp jl Lp11 @@ -810,12 +810,12 @@ LLEntry6_8: movw %bp,4(%ecx) movb %al,2(%edi) Lp11: - addl izistep,%ebp + addl C(izistep),%ebp adcl $0,%ebp - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi + addl C(sstep),%ebx + adcl C(advancetable)+4(,%eax,4),%esi LLEntry5_8: cmpw 6(%ecx),%bp jl Lp12 @@ -825,12 +825,12 @@ LLEntry5_8: movw %bp,6(%ecx) movb %al,3(%edi) Lp12: - addl izistep,%ebp + addl C(izistep),%ebp adcl $0,%ebp - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi + addl C(sstep),%ebx + adcl C(advancetable)+4(,%eax,4),%esi LLEntry4_8: cmpw 8(%ecx),%bp jl Lp13 @@ -840,12 +840,12 @@ LLEntry4_8: movw %bp,8(%ecx) movb %al,4(%edi) Lp13: - addl izistep,%ebp + addl C(izistep),%ebp adcl $0,%ebp - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi + addl C(sstep),%ebx + adcl C(advancetable)+4(,%eax,4),%esi LLEntry3_8: cmpw 10(%ecx),%bp jl Lp14 @@ -855,12 +855,12 @@ LLEntry3_8: movw %bp,10(%ecx) movb %al,5(%edi) Lp14: - addl izistep,%ebp + addl C(izistep),%ebp adcl $0,%ebp - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi + addl C(sstep),%ebx + adcl C(advancetable)+4(,%eax,4),%esi LLEntry2_8: cmpw 12(%ecx),%bp jl Lp15 @@ -870,12 +870,12 @@ LLEntry2_8: movw %bp,12(%ecx) movb %al,6(%edi) Lp15: - addl izistep,%ebp + addl C(izistep),%ebp adcl $0,%ebp - addl tstep,%edx + addl C(tstep),%edx sbbl %eax,%eax - addl sstep,%ebx - adcl advancetable+4(,%eax,4),%esi + addl C(sstep),%ebx + adcl C(advancetable)+4(,%eax,4),%esi LEndSpan: cmpw 14(%ecx),%bp diff --git a/libs/video/renderer/sw/d_varsa.S b/libs/video/renderer/sw/d_varsa.S index d6ac3504b..ece5dc2c1 100644 --- a/libs/video/renderer/sw/d_varsa.S +++ b/libs/video/renderer/sw/d_varsa.S @@ -95,39 +95,39 @@ C(d_zwidth): .long 0 //------------------------------------------------------- // ASM-only variables //------------------------------------------------------- -.globl izi -izi: .long 0 +.globl C(izi) +C(izi): .long 0 -.globl pbase, s, t, sfracf, tfracf, snext, tnext -.globl spancountminus1, zi16stepu, sdivz16stepu, tdivz16stepu -.globl zi8stepu, sdivz8stepu, tdivz8stepu, pz -s: .long 0 -t: .long 0 -snext: .long 0 -tnext: .long 0 -sfracf: .long 0 -tfracf: .long 0 -pbase: .long 0 -zi8stepu: .long 0 -sdivz8stepu: .long 0 -tdivz8stepu: .long 0 -zi16stepu: .long 0 -sdivz16stepu: .long 0 -tdivz16stepu: .long 0 -spancountminus1: .long 0 -pz: .long 0 +.globl C(pbase), C(s), C(t), C(sfracf), C(tfracf), C(snext), C(tnext) +.globl C(spancountminus1), C(zi16stepu), C(sdivz16stepu), C(tdivz16stepu) +.globl C(zi8stepu), C(sdivz8stepu), C(tdivz8stepu), C(pz) +C(s): .long 0 +C(t): .long 0 +C(snext): .long 0 +C(tnext): .long 0 +C(sfracf): .long 0 +C(tfracf): .long 0 +C(pbase): .long 0 +C(zi8stepu): .long 0 +C(sdivz8stepu): .long 0 +C(tdivz8stepu): .long 0 +C(zi16stepu): .long 0 +C(sdivz16stepu): .long 0 +C(tdivz16stepu): .long 0 +C(spancountminus1): .long 0 +C(pz): .long 0 -.globl izistep -izistep: .long 0 +.globl C(izistep) +C(izistep): .long 0 //------------------------------------------------------- // local variables for d_draw16.s //------------------------------------------------------- -.globl reciprocal_table_16 +.globl C(reciprocal_table_16) // 1/2, 1/3, 1/4, 1/5, 1/6, 1/7, 1/8, 1/9, 1/10, 1/11, 1/12, 1/13, // 1/14, and 1/15 in 0.32 form -reciprocal_table_16: .long 0x40000000, 0x2aaaaaaa, 0x20000000 +C(reciprocal_table_16): .long 0x40000000, 0x2aaaaaaa, 0x20000000 .long 0x19999999, 0x15555555, 0x12492492 .long 0x10000000, 0xe38e38e, 0xccccccc, 0xba2e8ba .long 0xaaaaaaa, 0x9d89d89, 0x9249249, 0x8888888 @@ -136,68 +136,70 @@ reciprocal_table_16: .long 0x40000000, 0x2aaaaaaa, 0x20000000 //------------------------------------------------------- // local variables for d_parta.s //------------------------------------------------------- -.globl DP_Count, DP_u, DP_v, DP_32768, DP_Color, DP_Pix, DP_EntryTable -DP_Count: .long 0 -DP_u: .long 0 -DP_v: .long 0 -DP_32768: .single 32768.0 -DP_Color: .long 0 -DP_Pix: .long 0 +.globl C(DP_Count), C(DP_u), C(DP_v), C(DP_32768), C(DP_Color) +.globl C(DP_Pix), C(DP_EntryTable) +C(DP_Count): .long 0 +C(DP_u): .long 0 +C(DP_v): .long 0 +C(DP_32768): .single 32768.0 +C(DP_Color): .long 0 +C(DP_Pix): .long 0 #ifndef NeXT - .extern DP_1x1 - .extern DP_2x2 - .extern DP_3x3 - .extern DP_4x4 + .extern C(DP_1x1) + .extern C(DP_2x2) + .extern C(DP_3x3) + .extern C(DP_4x4) #endif -DP_EntryTable: .long DP_1x1, DP_2x2, DP_3x3, DP_4x4 +C(DP_EntryTable): .long C(DP_1x1), C(DP_2x2), C(DP_3x3), C(DP_4x4) // // advancetable is 8 bytes, but points to the middle of that range so negative // offsets will work // -.globl advancetable, sstep, tstep, pspantemp, counttemp, jumptemp -advancetable: .long 0, 0 -sstep: .long 0 -tstep: .long 0 +.globl C(advancetable), C(sstep), C(tstep), C(pspantemp) +.globl C(counttemp), C(jumptemp) +C(advancetable): .long 0, 0 +C(sstep): .long 0 +C(tstep): .long 0 -pspantemp: .long 0 -counttemp: .long 0 -jumptemp: .long 0 +C(pspantemp): .long 0 +C(counttemp): .long 0 +C(jumptemp): .long 0 // 1/2, 1/3, 1/4, 1/5, 1/6, and 1/7 in 0.32 form -.globl reciprocal_table, entryvec_table -reciprocal_table: .long 0x40000000, 0x2aaaaaaa, 0x20000000 +.globl C(reciprocal_table), C(entryvec_table) +C(reciprocal_table): .long 0x40000000, 0x2aaaaaaa, 0x20000000 .long 0x19999999, 0x15555555, 0x12492492 #ifndef NeXT - .extern Entry2_8 - .extern Entry3_8 - .extern Entry4_8 - .extern Entry5_8 - .extern Entry6_8 - .extern Entry7_8 - .extern Entry8_8 + .extern C(Entry2_8) + .extern C(Entry3_8) + .extern C(Entry4_8) + .extern C(Entry5_8) + .extern C(Entry6_8) + .extern C(Entry7_8) + .extern C(Entry8_8) #endif -entryvec_table: .long 0, Entry2_8, Entry3_8, Entry4_8 - .long Entry5_8, Entry6_8, Entry7_8, Entry8_8 +C(entryvec_table): .long 0, C(Entry2_8), C(Entry3_8), C(Entry4_8) + .long C(Entry5_8), C(Entry6_8), C(Entry7_8), C(Entry8_8) #ifndef NeXT - .extern Spr8Entry2_8 - .extern Spr8Entry3_8 - .extern Spr8Entry4_8 - .extern Spr8Entry5_8 - .extern Spr8Entry6_8 - .extern Spr8Entry7_8 - .extern Spr8Entry8_8 + .extern C(Spr8Entry2_8) + .extern C(Spr8Entry3_8) + .extern C(Spr8Entry4_8) + .extern C(Spr8Entry5_8) + .extern C(Spr8Entry6_8) + .extern C(Spr8Entry7_8) + .extern C(Spr8Entry8_8) #endif -.globl spr8entryvec_table -spr8entryvec_table: .long 0, Spr8Entry2_8, Spr8Entry3_8, Spr8Entry4_8 - .long Spr8Entry5_8, Spr8Entry6_8, Spr8Entry7_8, Spr8Entry8_8 +.globl C(spr8entryvec_table) +C(spr8entryvec_table): .long 0, C(Spr8Entry2_8), C(Spr8Entry3_8), C(Spr8Entry4_8) + .long C(Spr8Entry5_8), C(Spr8Entry6_8), C(Spr8Entry7_8), C(Spr8Entry8_8) #endif // USE_INTEL_ASM diff --git a/libs/video/renderer/sw/fpua.S b/libs/video/renderer/sw/fpua.S index 203ffcd0a..bfe166ee4 100644 --- a/libs/video/renderer/sw/fpua.S +++ b/libs/video/renderer/sw/fpua.S @@ -97,30 +97,30 @@ F_END(unmaskexceptions) .data .align 4 -.globl ceil_cw, single_cw, full_cw, cw, pushed_cw -ceil_cw: .long 0 -single_cw: .long 0 -full_cw: .long 0 -cw: .long 0 -pushed_cw: .long 0 +.globl C(ceil_cw), C(single_cw), C(full_cw), C(cw), C(pushed_cw) +C(ceil_cw): .long 0 +C(single_cw): .long 0 +C(full_cw): .long 0 +C(cw): .long 0 +C(pushed_cw): .long 0 #ifdef PIC -.type ceil_cw,@object -.type single_cw,@object -.type full_cw,@object -.type cw,@object -.type pushed_cw,@object -.size ceil_cw,4 -.size single_cw,4 -.size full_cw,4 -.size cw,4 -.size pushed_cw,4 +.type C(ceil_cw),@object +.type C(single_cw),@object +.type C(full_cw),@object +.type C(cw),@object +.type C(pushed_cw),@object +.size C(ceil_cw),4 +.size C(single_cw),4 +.size C(full_cw),4 +.size C(cw),4 +.size C(pushed_cw),4 #endif .text F_BEGIN(R_LowFPPrecision) got_base(3) - fldcw got_var(single_cw) + fldcw got_var(C(single_cw)) ret F_END(R_LowFPPrecision) @@ -128,7 +128,7 @@ F_END(R_LowFPPrecision) F_BEGIN(R_HighFPPrecision) got_base(4) - fldcw got_var(full_cw) + fldcw got_var(C(full_cw)) ret F_END(R_HighFPPrecision) @@ -136,19 +136,19 @@ F_END(R_HighFPPrecision) F_BEGIN(R_SetFPCW) got_base(7) - fnstcw got_var(cw) - movl got_var(cw),%eax + fnstcw got_var(C(cw)) + movl got_var(C(cw)),%eax andb $0xF0,%ah orb $0x03,%ah // round mode, 64-bit precision - movl %eax,got_var(full_cw) + movl %eax,got_var(C(full_cw)) andb $0xF0,%ah orb $0x0C,%ah // chop mode, single precision - movl %eax,got_var(single_cw) + movl %eax,got_var(C(single_cw)) andb $0xF0,%ah orb $0x08,%ah // ceil mode, single precision - movl %eax,got_var(ceil_cw) + movl %eax,got_var(C(ceil_cw)) ret F_END(R_SetFPCW) diff --git a/libs/video/renderer/sw/sw_raclipa.S b/libs/video/renderer/sw/sw_raclipa.S index 1a094b788..24ab9513e 100644 --- a/libs/video/renderer/sw/sw_raclipa.S +++ b/libs/video/renderer/sw/sw_raclipa.S @@ -68,11 +68,11 @@ C(R_Alias_clip_bottom): popl %eax addl $C(_GLOBAL_OFFSET_TABLE_)-1+[.-.Lpic1],%eax - movl float_point5@GOTOFF(%eax),%edx + movl C(float_point5)@GOTOFF(%eax),%edx movl C(r_refdef)@GOT(%eax),%eax #else leal C(r_refdef),%eax - movl float_point5,%edx + movl C(float_point5),%edx #endif movl rd_aliasvrectbottom(%eax),%eax movl %edx,point5(%esp) @@ -209,11 +209,11 @@ C(R_Alias_clip_top): popl %eax addl $C(_GLOBAL_OFFSET_TABLE_)-1+[.-.Lpic2],%eax - movl float_point5@GOTOFF(%eax),%edx + movl C(float_point5)@GOTOFF(%eax),%edx movl C(r_refdef)@GOT(%eax),%eax #else leal C(r_refdef),%eax - movl float_point5,%edx + movl C(float_point5),%edx #endif movl rd_aliasvrect+4(%eax),%eax movl %edx,point5(%esp) @@ -245,11 +245,11 @@ C(R_Alias_clip_right): popl %eax addl $C(_GLOBAL_OFFSET_TABLE_)-1+[.-.Lpic3],%eax - movl float_point5@GOTOFF(%eax),%edx + movl C(float_point5)@GOTOFF(%eax),%edx movl C(r_refdef)@GOT(%eax),%eax #else leal C(r_refdef),%eax - movl float_point5,%edx + movl C(float_point5),%edx #endif movl rd_aliasvrectright(%eax),%eax movl %edx,point5(%esp) @@ -300,11 +300,11 @@ C(R_Alias_clip_left): popl %eax addl $C(_GLOBAL_OFFSET_TABLE_)-1+[.-.Lpic4],%eax - movl float_point5@GOTOFF(%eax),%edx + movl C(float_point5)@GOTOFF(%eax),%edx movl C(r_refdef)@GOT(%eax),%eax #else leal C(r_refdef),%eax - movl float_point5,%edx + movl C(float_point5),%edx #endif movl rd_aliasvrect+0(%eax),%eax movl %edx,point5(%esp) diff --git a/libs/video/renderer/sw/sw_rdrawa.S b/libs/video/renderer/sw/sw_rdrawa.S index d6ceaf211..8fdde816a 100644 --- a/libs/video/renderer/sw/sw_rdrawa.S +++ b/libs/video/renderer/sw/sw_rdrawa.S @@ -153,7 +153,7 @@ Lemit: // // FIXME: do away with by manually extracting integers from floats? // FIXME: set less often - fldcw r_ceil_cw + fldcw C(r_ceil_cw) // edge_t *edge, *pcheck; // int u_check; @@ -220,7 +220,7 @@ LCalcSecond: // r_ceilv1 = (int)(r_v1 - 2000) + 2000; // ceil(r_v1); fistl C(r_ceilv1) - fldcw r_single_cw // put back normal floating-point state + fldcw C(r_single_cw) // put back normal floating-point state fsts C(r_v1) fxch %st(4) // lzi0 | lzi1 | u1 | v0 | v1 | u0 @@ -398,11 +398,11 @@ LSideDone: // edge->u_step = u_step*0x100000; // edge->u = u*0x100000 + 0xFFFFF; - fmuls fp_1m // u*0x100000 | ustep + fmuls C(fp_1m) // u*0x100000 | ustep fxch %st(1) // ustep | u*0x100000 - fmuls fp_1m // ustep*0x100000 | u*0x100000 + fmuls C(fp_1m) // ustep*0x100000 | u*0x100000 fxch %st(1) // u*0x100000 | ustep*0x100000 - fadds fp_1m_minus_1 // u*0x100000 + 0xFFFFF | ustep*0x100000 + fadds C(fp_1m_minus_1) // u*0x100000 + 0xFFFFF | ustep*0x100000 fxch %st(1) // ustep*0x100000 | u*0x100000 + 0xFFFFF fistpl et_u_step(%edi) // u*0x100000 + 0xFFFFF fistpl et_u(%edi) @@ -784,7 +784,7 @@ LTransformAndProject: LNoClip: - fdivrs float_1 // lzi0 | x | y + fdivrs C(float_1) // lzi0 | x | y fxch %st(1) // x | lzi0 | y // // FIXME: build x/yscale into transform? diff --git a/libs/video/renderer/sw/sw_rvarsa.S b/libs/video/renderer/sw/sw_rvarsa.S index dbdbe0463..ca28a2e26 100644 --- a/libs/video/renderer/sw/sw_rvarsa.S +++ b/libs/video/renderer/sw/sw_rvarsa.S @@ -41,43 +41,43 @@ //------------------------------------------------------- // ASM-only variables //------------------------------------------------------- -.globl float_1, float_particle_z_clip, float_point5 -.globl float_minus_1, float_0 -float_0: .single 0.0 -float_1: .single 1.0 -float_minus_1: .single -1.0 -float_particle_z_clip: .single PARTICLE_Z_CLIP -float_point5: .single 0.5 +.globl C(float_1), C(float_particle_z_clip), C(float_point5) +.globl C(float_minus_1), C(float_0) +C(float_0): .single 0.0 +C(float_1): .single 1.0 +C(float_minus_1): .single -1.0 +C(float_particle_z_clip): .single PARTICLE_Z_CLIP +C(float_point5): .single 0.5 -.globl fp_16, fp_64k, fp_1m, fp_64kx64k -.globl fp_1m_minus_1 -.globl fp_8 -fp_1m: .single 1048576.0 -fp_1m_minus_1: .single 1048575.0 -fp_64k: .single 65536.0 -fp_8: .single 8.0 -fp_16: .single 16.0 -fp_64kx64k: .long 0x4f000000 // (float)0x8000*0x10000 +.globl C(fp_16), C(fp_64k), C(fp_1m), C(fp_64kx64k) +.globl C(fp_1m_minus_1) +.globl C(fp_8) +C(fp_1m): .single 1048576.0 +C(fp_1m_minus_1): .single 1048575.0 +C(fp_64k): .single 65536.0 +C(fp_8): .single 8.0 +C(fp_16): .single 16.0 +C(fp_64kx64k): .long 0x4f000000 // (float)0x8000*0x10000 -.globl FloatZero, Float2ToThe31nd, FloatMinus2ToThe31nd -FloatZero: .long 0 -Float2ToThe31nd: .long 0x4f000000 -FloatMinus2ToThe31nd: .long 0xcf000000 +.globl C(FloatZero), C(Float2ToThe31nd), C(FloatMinus2ToThe31nd) +C(FloatZero): .long 0 +C(Float2ToThe31nd): .long 0x4f000000 +C(FloatMinus2ToThe31nd): .long 0xcf000000 -.globl r_ceil_cw, r_single_cw -r_ceil_cw: .long 0 -r_single_cw: .long 0 +.globl C(r_ceil_cw), C(r_single_cw) +C(r_ceil_cw): .long 0 +C(r_single_cw): .long 0 .globl C(r_bmodelactive) C(r_bmodelactive): .long 0 .global C(R_InitVars) C(R_InitVars): - movl ceil_cw, %eax - movl single_cw, %edx - movl %eax, r_ceil_cw - movl %edx, r_single_cw + movl C(ceil_cw), %eax + movl C(single_cw), %edx + movl %eax, C(r_ceil_cw) + movl %edx, C(r_single_cw) ret #endif // USE_INTEL_ASM