2007-12-06 04:10:26 +00:00
; vim:filetype=nasm ts=8
; libFLAC - Free Lossless Audio Codec library
; Copyright (C) 2001,2002,2003,2004,2005,2006,2007 Josh Coalson
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
;
; - Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
;
; - Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in the
; documentation and/or other materials provided with the distribution.
;
; - Neither the name of the Xiph.org Foundation nor the names of its
; contributors may be used to endorse or promote products derived from
; this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
% include "ia32/nasm.h"
data_section
cextern FLAC__crc16_table ; unsigned FLAC__crc16_table[256];
cextern bitreader_read_from_client_ ; FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br);
cglobal FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
code_section
; **********************************************************************
;
; void FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter)
;
; Some details like assertions and other checking is performed by the caller.
ALIGN 16
cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
;ASSERT(0 != br);
;ASSERT(0 != br->buffer);
; WATCHOUT: code only works if sizeof(brword)==32; we can make things much faster with this assertion
;ASSERT(FLAC__BITS_PER_WORD == 32);
;ASSERT(parameter < 32);
; the above two asserts also guarantee that the binary part never straddles more than 2 words, so we don't have to loop to read it
;; peppered throughout the code at major checkpoints are keys like this as to where things are at that point in time
;; [esp + 16] unsigned parameter
;; [esp + 12] unsigned nvals
;; [esp + 8] int vals[]
;; [esp + 4] FLAC__BitReader *br
mov eax , [ esp + 12 ] ; if(nvals == 0)
test eax , eax
ja .nvals_gt_0
mov eax , 1 ; return true;
ret
.nvals_gt_0:
push ebp
push ebx
push esi
push edi
sub esp , 4
;; [esp + 36] unsigned parameter
;; [esp + 32] unsigned nvals
;; [esp + 28] int vals[]
;; [esp + 24] FLAC__BitReader *br
;; [esp] ucbits
mov ebp , [ esp + 24 ] ; ebp <- br == br->buffer
mov esi , [ ebp + 16 ] ; esi <- br->consumed_words (aka 'cwords' in the C version)
mov ecx , [ ebp + 20 ] ; ecx <- br->consumed_bits (aka 'cbits' in the C version)
xor edi , edi ; edi <- 0 'uval'
;; ecx cbits
;; esi cwords
;; edi uval
;; ebp br
;; [ebp] br->buffer
;; [ebp + 8] br->words
;; [ebp + 12] br->bytes
;; [ebp + 16] br->consumed_words
;; [ebp + 20] br->consumed_bits
;; [ebp + 24] br->read_crc
;; [ebp + 28] br->crc16_align
; ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits;
mov eax , [ ebp + 8 ] ; eax <- br->words
sub eax , esi ; eax <- br->words-cwords
shl eax , 2 ; eax <- (br->words-cwords)*FLAC__BYTES_PER_WORD
add eax , [ ebp + 12 ] ; eax <- (br->words-cwords)*FLAC__BYTES_PER_WORD + br->bytes
shl eax , 3 ; eax <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8
sub eax , ecx ; eax <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits
mov [ esp ], eax ; ucbits <- eax
ALIGN 16
.val_loop: ; while(1) {
;
; read unary part
;
.unary_loop: ; while(1) {
;; ecx cbits
;; esi cwords
;; edi uval
;; ebp br
cmp esi , [ ebp + 8 ] ; while(cwords < br->words) /* if we've not consumed up to a partial tail word... */
jae near .c1_next1
.c1_loop: ; {
mov ebx , [ ebp ]
mov eax , [ ebx + 4 * esi ] ; b = br->buffer[cwords]
mov edx , eax ; edx = br->buffer[cwords] (saved for later use)
shl eax , cl ; b = br->buffer[cwords] << cbits
test eax , eax ; (still have to test since cbits may be 0, thus ZF not updated for shl eax,0)
jz near .c1_next2 ; if(b) {
bsr ebx , eax
not ebx
and ebx , 31 ; ebx = 'i' = # of leading 0 bits in 'b' (eax)
add ecx , ebx ; cbits += i;
add edi , ebx ; uval += i;
add ecx , byte 1 ; cbits++; /* skip over stop bit */
test ecx , ~ 31
jz near .break1 ; if(cbits >= FLAC__BITS_PER_WORD) { /* faster way of testing if(cbits == FLAC__BITS_PER_WORD) */
; crc16_update_word_(br, br->buffer[cwords]);
push edi ; [need more registers]
bswap edx ; edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier
mov ecx , [ ebp + 28 ] ; ecx <- br->crc16_align
mov eax , [ ebp + 24 ] ; ax <- br->read_crc (a.k.a. crc)
% ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
mov edi , _FLAC__crc16_table
% else
mov edi , FLAC__crc16_table
% endif
;; eax (ax) crc a.k.a. br->read_crc
;; ebx (bl) intermediate result index into FLAC__crc16_table[]
;; ecx br->crc16_align
;; edx byteswapped brword to CRC
;; esi cwords
;; edi unsigned FLAC__crc16_table[]
;; ebp br
test ecx , ecx ; switch(br->crc16_align) ...
jnz .c0b4 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case]
.c0b0: xor dl , ah ; dl <- (crc>>8)^(word>>24)
movzx ebx , dl
mov ecx , [ ebx * 4 + edi ] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)]
shl eax , 8 ; ax <- (crc<<8)
xor eax , ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)]
.c0b1: xor dh , ah ; dh <- (crc>>8)^((word>>16)&0xff))
movzx ebx , dh
mov ecx , [ ebx * 4 + edi ] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
shl eax , 8 ; ax <- (crc<<8)
xor eax , ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
shr edx , 16
.c0b2: xor dl , ah ; dl <- (crc>>8)^((word>>8)&0xff))
movzx ebx , dl
mov ecx , [ ebx * 4 + edi ] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
shl eax , 8 ; ax <- (crc<<8)
xor eax , ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
.c0b3: xor dh , ah ; dh <- (crc>>8)^(word&0xff)
movzx ebx , dh
mov ecx , [ ebx * 4 + edi ] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)]
shl eax , 8 ; ax <- (crc<<8)
xor eax , ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)]
movzx eax , ax
mov [ ebp + 24 ], eax ; br->read_crc <- crc
pop edi
add esi , byte 1 ; cwords++;
xor ecx , ecx ; cbits = 0;
; }
jmp near .break1 ; goto break1;
;; this section relocated out of the way for performance
.c0b4:
mov [ ebp + 28 ], dword 0 ; br->crc16_align <- 0
cmp ecx , 8
je .c0b1
shr edx , 16
cmp ecx , 16
je .c0b2
jmp .c0b3
;; this section relocated out of the way for performance
.c1b4:
mov [ ebp + 28 ], dword 0 ; br->crc16_align <- 0
cmp ecx , 8
je .c1b1
shr edx , 16
cmp ecx , 16
je .c1b2
jmp .c1b3
.c1_next2: ; } else {
;; ecx cbits
;; edx current brword 'b'
;; esi cwords
;; edi uval
;; ebp br
add edi , 32
sub edi , ecx ; uval += FLAC__BITS_PER_WORD - cbits;
; crc16_update_word_(br, br->buffer[cwords]);
push edi ; [need more registers]
bswap edx ; edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier
mov ecx , [ ebp + 28 ] ; ecx <- br->crc16_align
mov eax , [ ebp + 24 ] ; ax <- br->read_crc (a.k.a. crc)
% ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
mov edi , _FLAC__crc16_table
% else
mov edi , FLAC__crc16_table
% endif
;; eax (ax) crc a.k.a. br->read_crc
;; ebx (bl) intermediate result index into FLAC__crc16_table[]
;; ecx br->crc16_align
;; edx byteswapped brword to CRC
;; esi cwords
;; edi unsigned FLAC__crc16_table[]
;; ebp br
test ecx , ecx ; switch(br->crc16_align) ...
jnz .c1b4 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case]
.c1b0: xor dl , ah ; dl <- (crc>>8)^(word>>24)
movzx ebx , dl
mov ecx , [ ebx * 4 + edi ] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)]
shl eax , 8 ; ax <- (crc<<8)
xor eax , ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)]
.c1b1: xor dh , ah ; dh <- (crc>>8)^((word>>16)&0xff))
movzx ebx , dh
mov ecx , [ ebx * 4 + edi ] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
shl eax , 8 ; ax <- (crc<<8)
xor eax , ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
shr edx , 16
.c1b2: xor dl , ah ; dl <- (crc>>8)^((word>>8)&0xff))
movzx ebx , dl
mov ecx , [ ebx * 4 + edi ] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
shl eax , 8 ; ax <- (crc<<8)
xor eax , ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
.c1b3: xor dh , ah ; dh <- (crc>>8)^(word&0xff)
movzx ebx , dh
mov ecx , [ ebx * 4 + edi ] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)]
shl eax , 8 ; ax <- (crc<<8)
xor eax , ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)]
movzx eax , ax
mov [ ebp + 24 ], eax ; br->read_crc <- crc
pop edi
add esi , byte 1 ; cwords++;
xor ecx , ecx ; cbits = 0;
; /* didn't find stop bit yet, have to keep going... */
; }
cmp esi , [ ebp + 8 ] ; } while(cwords < br->words) /* if we've not consumed up to a partial tail word... */
jb near .c1_loop
.c1_next1:
; at this point we've eaten up all the whole words; have to try
; reading through any tail bytes before calling the read callback.
; this is a repeat of the above logic adjusted for the fact we
; don't have a whole word. note though if the client is feeding
; us data a byte at a time (unlikely), br->consumed_bits may not
; be zero.
;; ecx cbits
;; esi cwords
;; edi uval
;; ebp br
mov edx , [ ebp + 12 ] ; edx <- br->bytes
test edx , edx
jz .read1 ; if(br->bytes) { [NOTE: this case is rare so it doesn't have to be all that fast ]
mov ebx , [ ebp ]
shl edx , 3 ; edx <- const unsigned end = br->bytes * 8;
mov eax , [ ebx + 4 * esi ] ; b = br->buffer[cwords]
xchg edx , ecx ; [edx <- cbits , ecx <- end]
mov ebx , 0xffffffff ; ebx <- FLAC__WORD_ALL_ONES
shr ebx , cl ; ebx <- FLAC__WORD_ALL_ONES >> end
not ebx ; ebx <- ~(FLAC__WORD_ALL_ONES >> end)
xchg edx , ecx ; [edx <- end , ecx <- cbits]
and eax , ebx ; b = (br->buffer[cwords] & ~(FLAC__WORD_ALL_ONES >> end));
shl eax , cl ; b = (br->buffer[cwords] & ~(FLAC__WORD_ALL_ONES >> end)) << cbits;
test eax , eax ; (still have to test since cbits may be 0, thus ZF not updated for shl eax,0)
jz .c1_next3 ; if(b) {
bsr ebx , eax
not ebx
and ebx , 31 ; ebx = 'i' = # of leading 0 bits in 'b' (eax)
add ecx , ebx ; cbits += i;
add edi , ebx ; uval += i;
add ecx , byte 1 ; cbits++; /* skip over stop bit */
jmp short .break1 ; goto break1;
.c1_next3: ; } else {
sub edi , ecx
add edi , edx ; uval += end - cbits;
add ecx , edx ; cbits += end
; /* didn't find stop bit yet, have to keep going... */
; }
; }
.read1:
; flush registers and read; bitreader_read_from_client_() does
; not touch br->consumed_bits at all but we still need to set
; it in case it fails and we have to return false.
;; ecx cbits
;; esi cwords
;; edi uval
;; ebp br
mov [ ebp + 16 ], esi ; br->consumed_words = cwords;
mov [ ebp + 20 ], ecx ; br->consumed_bits = cbits;
push ecx ; /* save */
push ebp ; /* push br argument */
% ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
call _bitreader_read_from_client_
% else
call bitreader_read_from_client_
% endif
pop edx ; /* discard, unused */
pop ecx ; /* restore */
mov esi , [ ebp + 16 ] ; cwords = br->consumed_words;
; ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits;
mov ebx , [ ebp + 8 ] ; ebx <- br->words
sub ebx , esi ; ebx <- br->words-cwords
shl ebx , 2 ; ebx <- (br->words-cwords)*FLAC__BYTES_PER_WORD
add ebx , [ ebp + 12 ] ; ebx <- (br->words-cwords)*FLAC__BYTES_PER_WORD + br->bytes
shl ebx , 3 ; ebx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8
sub ebx , ecx ; ebx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits
add ebx , edi ; ebx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits + uval
; + uval to offset our count by the # of unary bits already
; consumed before the read, because we will add these back
; in all at once at break1
mov [ esp ], ebx ; ucbits <- ebx
test eax , eax ; if(!bitreader_read_from_client_(br))
jnz near .unary_loop
jmp .end ; return false; /* eax (the return value) is already 0 */
; } /* end while(1) unary part */
ALIGN 16
.break1:
;; ecx cbits
;; esi cwords
;; edi uval
;; ebp br
;; [esp] ucbits
sub [ esp ], edi ; ucbits -= uval;
sub dword [ esp ], byte 1 ; ucbits--; /* account for stop bit */
;
; read binary part
;
mov ebx , [ esp + 36 ] ; ebx <- parameter
test ebx , ebx ; if(parameter) {
jz near .break2
.read2:
cmp [ esp ], ebx ; while(ucbits < parameter) {
jae .c2_next1
; flush registers and read; bitreader_read_from_client_() does
; not touch br->consumed_bits at all but we still need to set
; it in case it fails and we have to return false.
mov [ ebp + 16 ], esi ; br->consumed_words = cwords;
mov [ ebp + 20 ], ecx ; br->consumed_bits = cbits;
push ecx ; /* save */
push ebp ; /* push br argument */
% ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
call _bitreader_read_from_client_
% else
call bitreader_read_from_client_
% endif
pop edx ; /* discard, unused */
pop ecx ; /* restore */
mov esi , [ ebp + 16 ] ; cwords = br->consumed_words;
; ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits;
mov edx , [ ebp + 8 ] ; edx <- br->words
sub edx , esi ; edx <- br->words-cwords
shl edx , 2 ; edx <- (br->words-cwords)*FLAC__BYTES_PER_WORD
add edx , [ ebp + 12 ] ; edx <- (br->words-cwords)*FLAC__BYTES_PER_WORD + br->bytes
shl edx , 3 ; edx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8
sub edx , ecx ; edx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits
mov [ esp ], edx ; ucbits <- edx
test eax , eax ; if(!bitreader_read_from_client_(br))
jnz .read2
jmp .end ; return false; /* eax (the return value) is already 0 */
; }
.c2_next1:
;; ebx parameter
;; ecx cbits
;; esi cwords
;; edi uval
;; ebp br
;; [esp] ucbits
cmp esi , [ ebp + 8 ] ; if(cwords < br->words) { /* if we've not consumed up to a partial tail word... */
jae near .c2_next2
test ecx , ecx ; if(cbits) {
jz near .c2_next3 ; /* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
mov eax , 32
mov edx , [ ebp ]
sub eax , ecx ; const unsigned n = FLAC__BITS_PER_WORD - cbits;
mov edx , [ edx + 4 * esi ] ; const brword word = br->buffer[cwords];
cmp ebx , eax ; if(parameter < n) {
jae .c2_next4
; uval <<= parameter;
; uval |= (word & (FLAC__WORD_ALL_ONES >> cbits)) >> (n-parameter);
shl edx , cl
xchg ebx , ecx
shld edi , edx , cl
add ebx , ecx ; cbits += parameter;
xchg ebx , ecx ; ebx <- parameter, ecx <- cbits
jmp .break2 ; goto break2;
; }
.c2_next4:
; uval <<= n;
; uval |= word & (FLAC__WORD_ALL_ONES >> cbits);
% if 1
rol edx , cl ; @@@@@@OPT: may be faster to use rol to save edx so we can restore it for CRC'ing
; @@@@@@OPT: or put parameter in ch instead and free up ebx completely again
% else
shl edx , cl
% endif
xchg eax , ecx
shld edi , edx , cl
xchg eax , ecx
% if 1
ror edx , cl ; restored.
% else
mov edx , [ ebp ]
mov edx , [ edx + 4 * esi ]
% endif
; crc16_update_word_(br, br->buffer[cwords]);
push edi ; [need more registers]
push ebx ; [need more registers]
push eax ; [need more registers]
bswap edx ; edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier
mov ecx , [ ebp + 28 ] ; ecx <- br->crc16_align
mov eax , [ ebp + 24 ] ; ax <- br->read_crc (a.k.a. crc)
% ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
mov edi , _FLAC__crc16_table
% else
mov edi , FLAC__crc16_table
% endif
;; eax (ax) crc a.k.a. br->read_crc
;; ebx (bl) intermediate result index into FLAC__crc16_table[]
;; ecx br->crc16_align
;; edx byteswapped brword to CRC
;; esi cwords
;; edi unsigned FLAC__crc16_table[]
;; ebp br
test ecx , ecx ; switch(br->crc16_align) ...
jnz .c2b4 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case]
.c2b0: xor dl , ah ; dl <- (crc>>8)^(word>>24)
movzx ebx , dl
mov ecx , [ ebx * 4 + edi ] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)]
shl eax , 8 ; ax <- (crc<<8)
xor eax , ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)]
.c2b1: xor dh , ah ; dh <- (crc>>8)^((word>>16)&0xff))
movzx ebx , dh
mov ecx , [ ebx * 4 + edi ] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
shl eax , 8 ; ax <- (crc<<8)
xor eax , ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
shr edx , 16
.c2b2: xor dl , ah ; dl <- (crc>>8)^((word>>8)&0xff))
movzx ebx , dl
mov ecx , [ ebx * 4 + edi ] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
shl eax , 8 ; ax <- (crc<<8)
xor eax , ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
.c2b3: xor dh , ah ; dh <- (crc>>8)^(word&0xff)
movzx ebx , dh
mov ecx , [ ebx * 4 + edi ] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)]
shl eax , 8 ; ax <- (crc<<8)
xor eax , ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)]
movzx eax , ax
mov [ ebp + 24 ], eax ; br->read_crc <- crc
pop eax
pop ebx
pop edi
add esi , byte 1 ; cwords++;
mov ecx , ebx
sub ecx , eax ; cbits = parameter - n;
jz .break2 ; if(cbits) { /* parameter > n, i.e. if there are still bits left to read, there have to be less than 32 so they will all be in the next word */
; uval <<= cbits;
; uval |= (br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits));
mov eax , [ ebp ]
mov eax , [ eax + 4 * esi ]
shld edi , eax , cl
; }
jmp .break2 ; goto break2;
;; this section relocated out of the way for performance
.c2b4:
mov [ ebp + 28 ], dword 0 ; br->crc16_align <- 0
cmp ecx , 8
je .c2b1
shr edx , 16
cmp ecx , 16
je .c2b2
jmp .c2b3
.c2_next3: ; } else {
mov ecx , ebx ; cbits = parameter;
; uval <<= cbits;
; uval |= (br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits));
mov eax , [ ebp ]
mov eax , [ eax + 4 * esi ]
shld edi , eax , cl
jmp .break2 ; goto break2;
; }
.c2_next2: ; } else {
; in this case we're starting our read at a partial tail word;
; the reader has guaranteed that we have at least 'parameter'
; bits available to read, which makes this case simpler.
; uval <<= parameter;
; if(cbits) {
; /* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
; uval |= (br->buffer[cwords] & (FLAC__WORD_ALL_ONES >> cbits)) >> (FLAC__BITS_PER_WORD-cbits-parameter);
; cbits += parameter;
; goto break2;
; } else {
; cbits = parameter;
; uval |= br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits);
; goto break2;
; }
; the above is much shorter in assembly:
mov eax , [ ebp ]
mov eax , [ eax + 4 * esi ] ; eax <- br->buffer[cwords]
shl eax , cl ; eax <- br->buffer[cwords] << cbits
add ecx , ebx ; cbits += parameter
xchg ebx , ecx ; ebx <- cbits, ecx <- parameter
shld edi , eax , cl ; uval <<= parameter <<< 'parameter' bits of tail word
xchg ebx , ecx ; ebx <- parameter, ecx <- cbits
; }
; }
.break2:
sub [ esp ], ebx ; ucbits -= parameter;
;
; compose the value
;
mov ebx , [ esp + 28 ] ; ebx <- vals
mov edx , edi ; edx <- uval
and edi , 1 ; edi <- uval & 1
shr edx , 1 ; edx <- uval >> 1
neg edi ; edi <- -(int)(uval & 1)
xor edx , edi ; edx <- (uval >> 1 ^ -(int)(uval & 1))
mov [ ebx ], edx ; *vals <- edx
sub dword [ esp + 32 ], byte 1 ; --nvals;
jz .finished ; if(nvals == 0) /* jump to finish */
xor edi , edi ; uval = 0;
add dword [ esp + 28 ], 4 ; ++vals
jmp .val_loop ; }
.finished:
mov [ ebp + 16 ], esi ; br->consumed_words = cwords;
mov [ ebp + 20 ], ecx ; br->consumed_bits = cbits;
mov eax , 1
.end:
add esp , 4
pop edi
pop esi
pop ebx
pop ebp
ret
2007-12-11 02:38:38 +00:00
end:
2007-12-06 04:10:26 +00:00
% ifdef OBJ_FORMAT_elf
section .note.GNU - stack noalloc
% endif