diff --git a/FLAC/FLAC++/decoder.h b/FLAC/FLAC++/decoder.h
index 57a6ee650c..e074223d89 100644
--- a/FLAC/FLAC++/decoder.h
+++ b/FLAC/FLAC++/decoder.h
@@ -1,355 +1,245 @@
-/* libFLAC++ - Free Lossless Audio Codec library
- * Copyright (C) 2002,2003,2004,2005  Josh Coalson
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * - Neither the name of the Xiph.org Foundation nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef FLACPP__DECODER_H
-#define FLACPP__DECODER_H
-
-#include "export.h"
-
-// [RH] ZDoom doesn't need these
-//#include "FLAC/file_decoder.h"
-//#include "FLAC/seekable_stream_decoder.h"
-#include "FLAC/stream_decoder.h"
-
-
-/** \file include/FLAC++/decoder.h
- *
- *  \brief
- *  This module contains the classes which implement the various
- *  decoders.
- *
- *  See the detailed documentation in the
- *  \link flacpp_decoder decoder \endlink module.
- */
-
-/** \defgroup flacpp_decoder FLAC++/decoder.h: decoder classes
- *  \ingroup flacpp
- *
- *  \brief
- *  This module describes the three decoder layers provided by libFLAC++.
- *
- * The libFLAC++ decoder classes are object wrappers around their
- * counterparts in libFLAC.  All three decoding layers available in
- * libFLAC are also provided here.  The interface is very similar;
- * make sure to read the \link flac_decoder libFLAC decoder module \endlink.
- *
- * The only real difference here is that instead of passing in C function
- * pointers for callbacks, you inherit from the decoder class and provide
- * implementations for the callbacks in the derived class; because of this
- * there is no need for a 'client_data' property.
- */
-
-namespace FLAC {
-	namespace Decoder {
-
-		// ============================================================
-		//
-		//  Equivalent: FLAC__StreamDecoder
-		//
-		// ============================================================
-
-		/** \defgroup flacpp_stream_decoder FLAC++/decoder.h: stream decoder class
-		 *  \ingroup flacpp_decoder
-		 *
-		 *  \brief
-		 *  This class wraps the ::FLAC__StreamDecoder.
-		 *
-		 * See the \link flac_stream_decoder libFLAC stream decoder module \endlink.
-		 *
-		 * \{
-		 */
-
-		/** This class wraps the ::FLAC__StreamDecoder.
-		 */
-		class FLACPP_API Stream {
-		public:
-			class FLACPP_API State {
-			public:
-				inline State(::FLAC__StreamDecoderState state): state_(state) { }
-				inline operator ::FLAC__StreamDecoderState() const { return state_; }
-				inline const char *as_cstring() const { return ::FLAC__StreamDecoderStateString[state_]; }
-				inline const char *resolved_as_cstring(const Stream &decoder) const { return ::FLAC__stream_decoder_get_resolved_state_string(decoder.decoder_); }
-			protected:
-				::FLAC__StreamDecoderState state_;
-			};
-
-			Stream();
-			virtual ~Stream();
-
-			bool is_valid() const;
-			inline operator bool() const { return is_valid(); }
-
-			bool set_metadata_respond(::FLAC__MetadataType type);
-			bool set_metadata_respond_application(const FLAC__byte id[4]);
-			bool set_metadata_respond_all();
-			bool set_metadata_ignore(::FLAC__MetadataType type);
-			bool set_metadata_ignore_application(const FLAC__byte id[4]);
-			bool set_metadata_ignore_all();
-
-			State get_state() const;
-			unsigned get_channels() const;
-			::FLAC__ChannelAssignment get_channel_assignment() const;
-			unsigned get_bits_per_sample() const;
-			unsigned get_sample_rate() const;
-			unsigned get_blocksize() const;
-
-			/** Initialize the instance; as with the C interface,
-			 *  init() should be called after construction and 'set'
-			 *  calls but before any of the 'process' calls.
-			 */
-			State init();
-
-			void finish();
-
-			bool flush();
-			bool reset();
-
-			bool process_single();
-			bool process_until_end_of_metadata();
-			bool process_until_end_of_stream();
-			bool skip_single_frame();
-		protected:
-			virtual ::FLAC__StreamDecoderReadStatus read_callback(FLAC__byte buffer[], unsigned *bytes) = 0;
-			virtual ::FLAC__StreamDecoderWriteStatus write_callback(const ::FLAC__Frame *frame, const FLAC__int32 * const buffer[]) = 0;
-			virtual void metadata_callback(const ::FLAC__StreamMetadata *metadata) = 0;
-			virtual void error_callback(::FLAC__StreamDecoderErrorStatus status) = 0;
-
-#if (defined _MSC_VER) || (defined __GNUG__ && (__GNUG__ < 2 || (__GNUG__ == 2 && __GNUC_MINOR__ < 96))) || (defined __SUNPRO_CC)
-			// lame hack: some MSVC/GCC versions can't see a protected decoder_ from nested State::resolved_as_cstring()
-			friend State;
-#endif
-			::FLAC__StreamDecoder *decoder_;
-		private:
-			static ::FLAC__StreamDecoderReadStatus read_callback_(const ::FLAC__StreamDecoder *decoder, FLAC__byte buffer[], unsigned *bytes, void *client_data);
-			static ::FLAC__StreamDecoderWriteStatus write_callback_(const ::FLAC__StreamDecoder *decoder, const ::FLAC__Frame *frame, const FLAC__int32 * const buffer[], void *client_data);
-			static void metadata_callback_(const ::FLAC__StreamDecoder *decoder, const ::FLAC__StreamMetadata *metadata, void *client_data);
-			static void error_callback_(const ::FLAC__StreamDecoder *decoder, ::FLAC__StreamDecoderErrorStatus status, void *client_data);
-
-			// Private and undefined so you can't use them:
-			Stream(const Stream &);
-			void operator=(const Stream &);
-		};
-
-		/* \} */
-
-#if 0	// [RH] Don't need these for ZDoom
-		// ============================================================
-		//
-		//  Equivalent: FLAC__SeekableStreamDecoder
-		//
-		// ============================================================
-
-		/** \defgroup flacpp_seekable_stream_decoder FLAC++/decoder.h: seekable stream decoder class
-		 *  \ingroup flacpp_decoder
-		 *
-		 *  \brief
-		 *  This class wraps the ::FLAC__SeekableStreamDecoder.
-		 *
-		 * See the \link flac_seekable_stream_decoder libFLAC seekable stream decoder module \endlink.
-		 *
-		 * \{
-		 */
-
-		/** This class wraps the ::FLAC__SeekableStreamDecoder.
-		 */
-		class FLACPP_API SeekableStream {
-		public:
-			class FLACPP_API State {
-			public:
-				inline State(::FLAC__SeekableStreamDecoderState state): state_(state) { }
-				inline operator ::FLAC__SeekableStreamDecoderState() const { return state_; }
-				inline const char *as_cstring() const { return ::FLAC__SeekableStreamDecoderStateString[state_]; }
-				inline const char *resolved_as_cstring(const SeekableStream &decoder) const { return ::FLAC__seekable_stream_decoder_get_resolved_state_string(decoder.decoder_); }
-			protected:
-				::FLAC__SeekableStreamDecoderState state_;
-			};
-
-			SeekableStream();
-			virtual ~SeekableStream();
-
-			bool is_valid() const;
-			inline operator bool() const { return is_valid(); }
-
-			bool set_md5_checking(bool value);
-			bool set_metadata_respond(::FLAC__MetadataType type);
-			bool set_metadata_respond_application(const FLAC__byte id[4]);
-			bool set_metadata_respond_all();
-			bool set_metadata_ignore(::FLAC__MetadataType type);
-			bool set_metadata_ignore_application(const FLAC__byte id[4]);
-			bool set_metadata_ignore_all();
-
-			State get_state() const;
-			Stream::State get_stream_decoder_state() const;
-			bool get_md5_checking() const;
-			unsigned get_channels() const;
-			::FLAC__ChannelAssignment get_channel_assignment() const;
-			unsigned get_bits_per_sample() const;
-			unsigned get_sample_rate() const;
-			unsigned get_blocksize() const;
-
-			State init();
-
-			bool finish();
-
-			bool flush();
-			bool reset();
-
-			bool process_single();
-			bool process_until_end_of_metadata();
-			bool process_until_end_of_stream();
-			bool skip_single_frame();
-
-			bool seek_absolute(FLAC__uint64 sample);
-		protected:
-			virtual ::FLAC__SeekableStreamDecoderReadStatus read_callback(FLAC__byte buffer[], unsigned *bytes) = 0;
-			virtual ::FLAC__SeekableStreamDecoderSeekStatus seek_callback(FLAC__uint64 absolute_byte_offset) = 0;
-			virtual ::FLAC__SeekableStreamDecoderTellStatus tell_callback(FLAC__uint64 *absolute_byte_offset) = 0;
-			virtual ::FLAC__SeekableStreamDecoderLengthStatus length_callback(FLAC__uint64 *stream_length) = 0;
-			virtual bool eof_callback() = 0;
-			virtual ::FLAC__StreamDecoderWriteStatus write_callback(const ::FLAC__Frame *frame, const FLAC__int32 * const buffer[]) = 0;
-			virtual void metadata_callback(const ::FLAC__StreamMetadata *metadata) = 0;
-			virtual void error_callback(::FLAC__StreamDecoderErrorStatus status) = 0;
-
-#if (defined _MSC_VER) || (defined __GNUG__ && (__GNUG__ < 2 || (__GNUG__ == 2 && __GNUC_MINOR__ < 96))) || (defined __SUNPRO_CC)
-			// lame hack: some MSVC/GCC versions can't see a protected decoder_ from nested State::resolved_as_cstring()
-			friend State;
-#endif
-			::FLAC__SeekableStreamDecoder *decoder_;
-		private:
-			static ::FLAC__SeekableStreamDecoderReadStatus read_callback_(const ::FLAC__SeekableStreamDecoder *decoder, FLAC__byte buffer[], unsigned *bytes, void *client_data);
-			static ::FLAC__SeekableStreamDecoderSeekStatus seek_callback_(const ::FLAC__SeekableStreamDecoder *decoder, FLAC__uint64 absolute_byte_offset, void *client_data);
-			static ::FLAC__SeekableStreamDecoderTellStatus tell_callback_(const ::FLAC__SeekableStreamDecoder *decoder, FLAC__uint64 *absolute_byte_offset, void *client_data);
-			static ::FLAC__SeekableStreamDecoderLengthStatus length_callback_(const ::FLAC__SeekableStreamDecoder *decoder, FLAC__uint64 *stream_length, void *client_data);
-			static FLAC__bool eof_callback_(const ::FLAC__SeekableStreamDecoder *decoder, void *client_data);
-			static ::FLAC__StreamDecoderWriteStatus write_callback_(const ::FLAC__SeekableStreamDecoder *decoder, const ::FLAC__Frame *frame, const FLAC__int32 * const buffer[], void *client_data);
-			static void metadata_callback_(const ::FLAC__SeekableStreamDecoder *decoder, const ::FLAC__StreamMetadata *metadata, void *client_data);
-			static void error_callback_(const ::FLAC__SeekableStreamDecoder *decoder, ::FLAC__StreamDecoderErrorStatus status, void *client_data);
-
-			// Private and undefined so you can't use them:
-			SeekableStream(const SeekableStream &);
-			void operator=(const SeekableStream &);
-		};
-
-		/* \} */
-
-		// ============================================================
-		//
-		//  Equivalent: FLAC__FileDecoder
-		//
-		// ============================================================
-
-		/** \defgroup flacpp_file_decoder FLAC++/decoder.h: file decoder class
-		 *  \ingroup flacpp_decoder
-		 *
-		 *  \brief
-		 *  This class wraps the ::FLAC__FileDecoder.
-		 *
-		 * See the \link flac_file_decoder libFLAC file decoder module \endlink.
-		 *
-		 * \{
-		 */
-
-		/** This class wraps the ::FLAC__FileDecoder.
-		 */
-		class FLACPP_API File {
-		public:
-			class FLACPP_API State {
-			public:
-				inline State(::FLAC__FileDecoderState state): state_(state) { }
-				inline operator ::FLAC__FileDecoderState() const { return state_; }
-				inline const char *as_cstring() const { return ::FLAC__FileDecoderStateString[state_]; }
-				inline const char *resolved_as_cstring(const File &decoder) const { return ::FLAC__file_decoder_get_resolved_state_string(decoder.decoder_); }
-			protected:
-				::FLAC__FileDecoderState state_;
-			};
-
-			File();
-			virtual ~File();
-
-			bool is_valid() const;
-			inline operator bool() const { return is_valid(); }
-
-			bool set_md5_checking(bool value);
-			bool set_filename(const char *value); //!< 'value' may not be \c NULL; use "-" for stdin
-			bool set_metadata_respond(::FLAC__MetadataType type);
-			bool set_metadata_respond_application(const FLAC__byte id[4]);
-			bool set_metadata_respond_all();
-			bool set_metadata_ignore(::FLAC__MetadataType type);
-			bool set_metadata_ignore_application(const FLAC__byte id[4]);
-			bool set_metadata_ignore_all();
-
-			State get_state() const;
-			SeekableStream::State get_seekable_stream_decoder_state() const;
-			Stream::State get_stream_decoder_state() const;
-			bool get_md5_checking() const;
-			unsigned get_channels() const;
-			::FLAC__ChannelAssignment get_channel_assignment() const;
-			unsigned get_bits_per_sample() const;
-			unsigned get_sample_rate() const;
-			unsigned get_blocksize() const;
-
-			State init();
-
-			bool finish();
-
-			bool process_single();
-			bool process_until_end_of_metadata();
-			bool process_until_end_of_file();
-			bool skip_single_frame();
-
-			bool seek_absolute(FLAC__uint64 sample);
-		protected:
-			virtual ::FLAC__StreamDecoderWriteStatus write_callback(const ::FLAC__Frame *frame, const FLAC__int32 * const buffer[]) = 0;
-			virtual void metadata_callback(const ::FLAC__StreamMetadata *metadata) = 0;
-			virtual void error_callback(::FLAC__StreamDecoderErrorStatus status) = 0;
-
-#if (defined _MSC_VER) || (defined __GNUG__ && (__GNUG__ < 2 || (__GNUG__ == 2 && __GNUC_MINOR__ < 96))) || (defined __SUNPRO_CC)
-			// lame hack: some MSVC/GCC versions can't see a protected decoder_ from nested State::resolved_as_cstring()
-			friend State;
-#endif
-			::FLAC__FileDecoder *decoder_;
-		private:
-			static ::FLAC__StreamDecoderWriteStatus write_callback_(const ::FLAC__FileDecoder *decoder, const ::FLAC__Frame *frame, const FLAC__int32 * const buffer[], void *client_data);
-			static void metadata_callback_(const ::FLAC__FileDecoder *decoder, const ::FLAC__StreamMetadata *metadata, void *client_data);
-			static void error_callback_(const ::FLAC__FileDecoder *decoder, ::FLAC__StreamDecoderErrorStatus status, void *client_data);
-
-			// Private and undefined so you can't use them:
-			File(const File &);
-			void operator=(const File &);
-		};
-
-		/* \} */
-#endif	// [RH]
-
-	}
-}
-
-#endif
+/* libFLAC++ - Free Lossless Audio Codec library
+ * Copyright (C) 2002,2003,2004,2005,2006,2007  Josh Coalson
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Xiph.org Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FLACPP__DECODER_H
+#define FLACPP__DECODER_H
+
+#include "export.h"
+
+// [RH] ZDoom doesn't need these
+//#include "FLAC/file_decoder.h"
+//#include "FLAC/seekable_stream_decoder.h"
+#include "FLAC/stream_decoder.h"
+
+
+/** \file include/FLAC++/decoder.h
+ *
+ *  \brief
+ *  This module contains the classes which implement the various
+ *  decoders.
+ *
+ *  See the detailed documentation in the
+ *  \link flacpp_decoder decoder \endlink module.
+ */
+
+/** \defgroup flacpp_decoder FLAC++/decoder.h: decoder classes
+ *  \ingroup flacpp
+ *
+ *  \brief
+ *  This module describes the decoder layers provided by libFLAC++.
+ *
+ * The libFLAC++ decoder classes are object wrappers around their
+ * counterparts in libFLAC.  All decoding layers available in
+ * libFLAC are also provided here.  The interface is very similar;
+ * make sure to read the \link flac_decoder libFLAC decoder module \endlink.
+ *
+ * There are only two significant differences here.  First, instead of
+ * passing in C function pointers for callbacks, you inherit from the
+ * decoder class and provide implementations for the callbacks in your
+ * derived class; because of this there is no need for a 'client_data'
+ * property.
+ *
+ * Second, there are two stream decoder classes.  FLAC::Decoder::Stream
+ * is used for the same cases that FLAC__stream_decoder_init_stream() /
+ * FLAC__stream_decoder_init_ogg_stream() are used, and FLAC::Decoder::File
+ * is used for the same cases that
+ * FLAC__stream_decoder_init_FILE() and FLAC__stream_decoder_init_file() /
+ * FLAC__stream_decoder_init_ogg_FILE() and FLAC__stream_decoder_init_ogg_file()
+ * are used.
+ */
+
+namespace FLAC {
+	namespace Decoder {
+
+		/** \ingroup flacpp_decoder
+		 *  \brief
+		 *  This class wraps the ::FLAC__StreamDecoder.  If you are
+		 *  decoding from a file, FLAC::Decoder::File may be more
+		 *  convenient.
+		 *
+		 * The usage of this class is similar to FLAC__StreamDecoder,
+		 * except instead of providing callbacks to
+		 * FLAC__stream_decoder_init*_stream(), you will inherit from this
+		 * class and override the virtual callback functions with your
+		 * own implementations, then call init() or init_ogg().  The rest
+		 * of the calls work the same as in the C layer.
+		 *
+		 * Only the read, write, and error callbacks are mandatory.  The
+		 * others are optional; this class provides default
+		 * implementations that do nothing.  In order for seeking to work
+		 * you must overide seek_callback(), tell_callback(),
+		 * length_callback(), and eof_callback().
+		 */
+		class FLACPP_API Stream {
+		public:
+			/** This class is a wrapper around FLAC__StreamDecoderState.
+			 */
+			class FLACPP_API State {
+			public:
+				inline State(::FLAC__StreamDecoderState state): state_(state) { }
+				inline operator ::FLAC__StreamDecoderState() const { return state_; }
+				inline const char *as_cstring() const { return ::FLAC__StreamDecoderStateString[state_]; }
+				inline const char *resolved_as_cstring(const Stream &decoder) const { return ::FLAC__stream_decoder_get_resolved_state_string(decoder.decoder_); }
+			protected:
+				::FLAC__StreamDecoderState state_;
+			};
+
+			Stream();
+			virtual ~Stream();
+
+			//@{
+			/** Call after construction to check the that the object was created
+			 *  successfully.  If not, use get_state() to find out why not.
+			 */
+			virtual bool is_valid() const;
+			inline operator bool() const { return is_valid(); } ///< See is_valid()
+			//@}
+
+			virtual bool set_ogg_serial_number(long value);                        ///< See FLAC__stream_decoder_set_ogg_serial_number()
+			virtual bool set_md5_checking(bool value);                             ///< See FLAC__stream_decoder_set_md5_checking()
+			virtual bool set_metadata_respond(::FLAC__MetadataType type);          ///< See FLAC__stream_decoder_set_metadata_respond()
+			virtual bool set_metadata_respond_application(const FLAC__byte id[4]); ///< See FLAC__stream_decoder_set_metadata_respond_application()
+			virtual bool set_metadata_respond_all();                               ///< See FLAC__stream_decoder_set_metadata_respond_all()
+			virtual bool set_metadata_ignore(::FLAC__MetadataType type);           ///< See FLAC__stream_decoder_set_metadata_ignore()
+			virtual bool set_metadata_ignore_application(const FLAC__byte id[4]);  ///< See FLAC__stream_decoder_set_metadata_ignore_application()
+			virtual bool set_metadata_ignore_all();                                ///< See FLAC__stream_decoder_set_metadata_ignore_all()
+
+			/* get_state() is not virtual since we want subclasses to be able to return their own state */
+			State get_state() const;                                          ///< See FLAC__stream_decoder_get_state()
+			virtual bool get_md5_checking() const;                            ///< See FLAC__stream_decoder_get_md5_checking()
+			virtual FLAC__uint64 get_total_samples() const;                   ///< See FLAC__stream_decoder_get_total_samples()
+			virtual unsigned get_channels() const;                            ///< See FLAC__stream_decoder_get_channels()
+			virtual ::FLAC__ChannelAssignment get_channel_assignment() const; ///< See FLAC__stream_decoder_get_channel_assignment()
+			virtual unsigned get_bits_per_sample() const;                     ///< See FLAC__stream_decoder_get_bits_per_sample()
+			virtual unsigned get_sample_rate() const;                         ///< See FLAC__stream_decoder_get_sample_rate()
+			virtual unsigned get_blocksize() const;                           ///< See FLAC__stream_decoder_get_blocksize()
+			virtual bool get_decode_position(FLAC__uint64 *position) const;   ///< See FLAC__stream_decoder_get_decode_position()
+
+			virtual ::FLAC__StreamDecoderInitStatus init();      ///< Seek FLAC__stream_decoder_init_stream()
+			virtual ::FLAC__StreamDecoderInitStatus init_ogg();  ///< Seek FLAC__stream_decoder_init_ogg_stream()
+
+			virtual bool finish(); ///< See FLAC__stream_decoder_finish()
+
+			virtual bool flush(); ///< See FLAC__stream_decoder_flush()
+			virtual bool reset(); ///< See FLAC__stream_decoder_reset()
+
+			virtual bool process_single();                ///< See FLAC__stream_decoder_process_single()
+			virtual bool process_until_end_of_metadata(); ///< See FLAC__stream_decoder_process_until_end_of_metadata()
+			virtual bool process_until_end_of_stream();   ///< See FLAC__stream_decoder_process_until_end_of_stream()
+			virtual bool skip_single_frame();             ///< See FLAC__stream_decoder_skip_single_frame()
+
+			virtual bool seek_absolute(FLAC__uint64 sample); ///< See FLAC__stream_decoder_seek_absolute()
+		protected:
+			/// see FLAC__StreamDecoderReadCallback
+			virtual ::FLAC__StreamDecoderReadStatus read_callback(FLAC__byte buffer[], size_t *bytes) = 0;
+
+			/// see FLAC__StreamDecoderSeekCallback
+			virtual ::FLAC__StreamDecoderSeekStatus seek_callback(FLAC__uint64 absolute_byte_offset);
+
+			/// see FLAC__StreamDecoderTellCallback
+			virtual ::FLAC__StreamDecoderTellStatus tell_callback(FLAC__uint64 *absolute_byte_offset);
+
+			/// see FLAC__StreamDecoderLengthCallback
+			virtual ::FLAC__StreamDecoderLengthStatus length_callback(FLAC__uint64 *stream_length);
+
+			/// see FLAC__StreamDecoderEofCallback
+			virtual bool eof_callback();
+
+			/// see FLAC__StreamDecoderWriteCallback
+			virtual ::FLAC__StreamDecoderWriteStatus write_callback(const ::FLAC__Frame *frame, const FLAC__int32 * const buffer[]) = 0;
+
+			/// see FLAC__StreamDecoderMetadataCallback
+			virtual void metadata_callback(const ::FLAC__StreamMetadata *metadata);
+
+			/// see FLAC__StreamDecoderErrorCallback
+			virtual void error_callback(::FLAC__StreamDecoderErrorStatus status) = 0;
+
+#if (defined _MSC_VER) || (defined __BORLANDC__) || (defined __GNUG__ && (__GNUG__ < 2 || (__GNUG__ == 2 && __GNUC_MINOR__ < 96))) || (defined __SUNPRO_CC)
+			// lame hack: some MSVC/GCC versions can't see a protected decoder_ from nested State::resolved_as_cstring()
+			friend State;
+#endif
+			::FLAC__StreamDecoder *decoder_;
+
+			static ::FLAC__StreamDecoderReadStatus read_callback_(const ::FLAC__StreamDecoder *decoder, FLAC__byte buffer[], size_t *bytes, void *client_data);
+			static ::FLAC__StreamDecoderSeekStatus seek_callback_(const ::FLAC__StreamDecoder *decoder, FLAC__uint64 absolute_byte_offset, void *client_data);
+			static ::FLAC__StreamDecoderTellStatus tell_callback_(const ::FLAC__StreamDecoder *decoder, FLAC__uint64 *absolute_byte_offset, void *client_data);
+			static ::FLAC__StreamDecoderLengthStatus length_callback_(const ::FLAC__StreamDecoder *decoder, FLAC__uint64 *stream_length, void *client_data);
+			static FLAC__bool eof_callback_(const ::FLAC__StreamDecoder *decoder, void *client_data);
+			static ::FLAC__StreamDecoderWriteStatus write_callback_(const ::FLAC__StreamDecoder *decoder, const ::FLAC__Frame *frame, const FLAC__int32 * const buffer[], void *client_data);
+			static void metadata_callback_(const ::FLAC__StreamDecoder *decoder, const ::FLAC__StreamMetadata *metadata, void *client_data);
+			static void error_callback_(const ::FLAC__StreamDecoder *decoder, ::FLAC__StreamDecoderErrorStatus status, void *client_data);
+		private:
+			// Private and undefined so you can't use them:
+			Stream(const Stream &);
+			void operator=(const Stream &);
+		};
+
+		/** \ingroup flacpp_decoder
+		 *  \brief
+		 *  This class wraps the ::FLAC__StreamDecoder.  If you are
+		 *  not decoding from a file, you may need to use
+		 *  FLAC::Decoder::Stream.
+		 *
+		 * The usage of this class is similar to FLAC__StreamDecoder,
+		 * except instead of providing callbacks to
+		 * FLAC__stream_decoder_init*_FILE() or
+		 * FLAC__stream_decoder_init*_file(), you will inherit from this
+		 * class and override the virtual callback functions with your
+		 * own implementations, then call init() or init_off().  The rest
+		 * of the calls work the same as in the C layer.
+		 *
+		 * Only the write, and error callbacks from FLAC::Decoder::Stream
+		 * are mandatory.  The others are optional; this class provides
+		 * full working implementations for all other callbacks and
+		 * supports seeking.
+		 */
+		class FLACPP_API File: public Stream {
+		public:
+			File();
+			virtual ~File();
+
+			virtual ::FLAC__StreamDecoderInitStatus init(FILE *file);                      ///< See FLAC__stream_decoder_init_FILE()
+			virtual ::FLAC__StreamDecoderInitStatus init(const char *filename);            ///< See FLAC__stream_decoder_init_file()
+			virtual ::FLAC__StreamDecoderInitStatus init_ogg(FILE *file);                  ///< See FLAC__stream_decoder_init_ogg_FILE()
+			virtual ::FLAC__StreamDecoderInitStatus init_ogg(const char *filename);        ///< See FLAC__stream_decoder_init_ogg_file()
+		protected:
+			// this is a dummy implementation to satisfy the pure virtual in Stream that is actually supplied internally by the C layer
+			virtual ::FLAC__StreamDecoderReadStatus read_callback(FLAC__byte buffer[], size_t *bytes);
+		private:
+			// Private and undefined so you can't use them:
+			File(const File &);
+			void operator=(const File &);
+		};
+
+	}
+}
+
+#endif
diff --git a/FLAC/FLAC++/export.h b/FLAC/FLAC++/export.h
index ba4a4b4c88..61c9f0853e 100644
--- a/FLAC/FLAC++/export.h
+++ b/FLAC/FLAC++/export.h
@@ -1,5 +1,5 @@
 /* libFLAC++ - Free Lossless Audio Codec library
- * Copyright (C) 2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -32,6 +32,29 @@
 #ifndef FLACPP__EXPORT_H
 #define FLACPP__EXPORT_H
 
+/** \file include/FLAC++/export.h
+ *
+ *  \brief
+ *  This module contains #defines and symbols for exporting function
+ *  calls, and providing version information and compiled-in features.
+ *
+ *  See the \link flacpp_export export \endlink module.
+ */
+
+/** \defgroup flacpp_export FLAC++/export.h: export symbols
+ *  \ingroup flacpp
+ *
+ *  \brief
+ *  This module contains #defines and symbols for exporting function
+ *  calls, and providing version information and compiled-in features.
+ *
+ *  If you are compiling with MSVC and will link to the static library
+ *  (libFLAC++.lib) you should define FLAC__NO_DLL in your project to
+ *  make sure the symbols are exported properly.
+ *
+ * \{
+ */
+
 #if defined(FLAC__NO_DLL) || !defined(_MSC_VER)
 #define FLACPP_API
 
@@ -44,4 +67,14 @@
 
 #endif
 #endif
+
+/* These #defines will mirror the libtool-based library version number, see
+ * http://www.gnu.org/software/libtool/manual.html#Libtool-versioning
+ */
+#define FLACPP_API_VERSION_CURRENT 8
+#define FLACPP_API_VERSION_REVISION 0
+#define FLACPP_API_VERSION_AGE 2
+
+/* \} */
+
 #endif
diff --git a/FLAC/FLAC.dsp b/FLAC/FLAC.dsp
deleted file mode 100644
index d0363b456f..0000000000
--- a/FLAC/FLAC.dsp
+++ /dev/null
@@ -1,327 +0,0 @@
-# Microsoft Developer Studio Project File - Name="FLAC" - Package Owner=<4>
-# Microsoft Developer Studio Generated Build File, Format Version 6.00
-# ** DO NOT EDIT **
-
-# TARGTYPE "Win32 (x86) Static Library" 0x0104
-
-CFG=FLAC - Win32 Debug
-!MESSAGE This is not a valid makefile. To build this project using NMAKE,
-!MESSAGE use the Export Makefile command and run
-!MESSAGE 
-!MESSAGE NMAKE /f "FLAC.mak".
-!MESSAGE 
-!MESSAGE You can specify a configuration when running NMAKE
-!MESSAGE by defining the macro CFG on the command line. For example:
-!MESSAGE 
-!MESSAGE NMAKE /f "FLAC.mak" CFG="FLAC - Win32 Debug"
-!MESSAGE 
-!MESSAGE Possible choices for configuration are:
-!MESSAGE 
-!MESSAGE "FLAC - Win32 Debug" (based on "Win32 (x86) Static Library")
-!MESSAGE "FLAC - Win32 Release" (based on "Win32 (x86) Static Library")
-!MESSAGE 
-
-# Begin Project
-# PROP AllowPerConfigDependencies 0
-# PROP Scc_ProjName ""
-# PROP Scc_LocalPath ""
-CPP=cl.exe
-MTL=midl.exe
-RSC=rc.exe
-
-!IF  "$(CFG)" == "FLAC - Win32 Debug"
-
-# PROP BASE Use_MFC 0
-# PROP BASE Use_Debug_Libraries 1
-# PROP BASE Output_Dir "Debug"
-# PROP BASE Intermediate_Dir "Debug"
-# PROP BASE Target_Dir ""
-# PROP Use_MFC 0
-# PROP Use_Debug_Libraries 1
-# PROP Output_Dir "Debug"
-# PROP Intermediate_Dir "Debug"
-# PROP Target_Dir ""
-# ADD BASE CPP /nologo /MTd /ZI /W3 /Od /D "WIN32" /D "_DEBUG" /D "_LIB" /D "FLAC__CPU_IA32" /D "FLAC__HAS_NASM" /D "FLAC__SSE_OS" /D "FLAC__USE_3DNOW" /D "_MBCS" /Gm PRECOMP_VC7_TOBEREMOVED /GZ /c /GX 
-# ADD CPP /nologo /MTd /ZI /W3 /Od /D "WIN32" /D "_DEBUG" /D "_LIB" /D "FLAC__CPU_IA32" /D "FLAC__HAS_NASM" /D "FLAC__SSE_OS" /D "FLAC__USE_3DNOW" /D "_MBCS" /Gm PRECOMP_VC7_TOBEREMOVED /GZ /c /GX 
-# ADD BASE MTL /nologo /win32 
-# ADD MTL /nologo /win32 
-# ADD BASE RSC /l 1033 
-# ADD RSC /l 1033 
-BSC32=bscmake.exe
-# ADD BASE BSC32 /nologo 
-# ADD BSC32 /nologo 
-LIB32=link.exe -lib
-# ADD BASE LIB32 /nologo /out:"Debug\FLAC.lib" 
-# ADD LIB32 /nologo /out:"Debug\FLAC.lib" 
-
-!ELSEIF  "$(CFG)" == "FLAC - Win32 Release"
-
-# PROP BASE Use_MFC 0
-# PROP BASE Use_Debug_Libraries 0
-# PROP BASE Output_Dir "Release"
-# PROP BASE Intermediate_Dir "Release"
-# PROP BASE Target_Dir ""
-# PROP Use_MFC 0
-# PROP Use_Debug_Libraries 0
-# PROP Output_Dir "Release"
-# PROP Intermediate_Dir "Release"
-# PROP Target_Dir ""
-# ADD BASE CPP /nologo /MT /Zi /W3 /O2 /Ob1 /Oy /D "WIN32" /D "NDEBUG" /D "_LIB" /D "FLAC__CPU_IA32" /D "FLAC__HAS_NASM" /D "FLAC__SSE_OS" /D "FLAC__USE_3DNOW" /D "_MBCS" /GF /Gy PRECOMP_VC7_TOBEREMOVED /c /GX 
-# ADD CPP /nologo /MT /Zi /W3 /O2 /Ob1 /Oy /D "WIN32" /D "NDEBUG" /D "_LIB" /D "FLAC__CPU_IA32" /D "FLAC__HAS_NASM" /D "FLAC__SSE_OS" /D "FLAC__USE_3DNOW" /D "_MBCS" /GF /Gy PRECOMP_VC7_TOBEREMOVED /c /GX 
-# ADD BASE MTL /nologo /win32 
-# ADD MTL /nologo /win32 
-# ADD BASE RSC /l 1033 
-# ADD RSC /l 1033 
-BSC32=bscmake.exe
-# ADD BASE BSC32 /nologo 
-# ADD BSC32 /nologo 
-LIB32=link.exe -lib
-# ADD BASE LIB32 /nologo /out:"Release\FLAC.lib" 
-# ADD LIB32 /nologo /out:"Release\FLAC.lib" 
-
-!ENDIF
-
-# Begin Target
-
-# Name "FLAC - Win32 Debug"
-# Name "FLAC - Win32 Release"
-# Begin Group "Source Files"
-
-# PROP Default_Filter "cpp;c;cxx;def;odl;idl;hpj;bat;asm"
-# Begin Source File
-
-SOURCE=bitbuffer.c
-# End Source File
-# Begin Source File
-
-SOURCE=bitmath.c
-# End Source File
-# Begin Source File
-
-SOURCE=cpu.c
-# End Source File
-# Begin Source File
-
-SOURCE=crc.c
-# End Source File
-# Begin Source File
-
-SOURCE=fixed.c
-# End Source File
-# Begin Source File
-
-SOURCE=format.c
-# End Source File
-# Begin Source File
-
-SOURCE=lpc.c
-# End Source File
-# Begin Source File
-
-SOURCE=stream_decoder.c
-# End Source File
-# Begin Source File
-
-SOURCE=stream_decoder_pp.cpp
-# End Source File
-# End Group
-# Begin Group "Header Files"
-
-# PROP Default_Filter "h;hpp;hxx;hm;inl;inc"
-# Begin Group "Protected"
-
-# PROP Default_Filter ""
-# Begin Source File
-
-SOURCE=protected\stream_decoder.h
-# End Source File
-# End Group
-# Begin Group "Private"
-
-# PROP Default_Filter ""
-# Begin Source File
-
-SOURCE=private\bitbuffer.h
-# End Source File
-# Begin Source File
-
-SOURCE=private\bitmath.h
-# End Source File
-# Begin Source File
-
-SOURCE=private\cpu.h
-# End Source File
-# Begin Source File
-
-SOURCE=private\crc.h
-# End Source File
-# Begin Source File
-
-SOURCE=private\fixed.h
-# End Source File
-# Begin Source File
-
-SOURCE=private\format.h
-# End Source File
-# Begin Source File
-
-SOURCE=private\lpc.h
-# End Source File
-# End Group
-# Begin Group "FLAC"
-
-# PROP Default_Filter ""
-# Begin Source File
-
-SOURCE=FLAC\assert.h
-# End Source File
-# Begin Source File
-
-SOURCE=FLAC\export.h
-# End Source File
-# Begin Source File
-
-SOURCE=FLAC\format.h
-# End Source File
-# Begin Source File
-
-SOURCE=FLAC\ordinals.h
-# End Source File
-# Begin Source File
-
-SOURCE=FLAC\stream_decoder.h
-# End Source File
-# End Group
-# Begin Group "FLAC++"
-
-# PROP Default_Filter ""
-# Begin Source File
-
-SOURCE=FLAC++\decoder.h
-# End Source File
-# Begin Source File
-
-SOURCE=FLAC++\export.h
-# End Source File
-# End Group
-# End Group
-# Begin Group "IA32 Files"
-
-# PROP Default_Filter ""
-# Begin Source File
-
-SOURCE=ia32\cpu_asm.nasm
-
-!IF  "$(CFG)" == "FLAC - Win32 Debug"
-
-# PROP Ignore_Default_Tool 1
-# Begin Custom Build - Assembling $(InputPath)...
-SOURCE="$(InputPath)"
-
-BuildCmds= \
-	nasmw -o $(IntDir)\$(InputName).obj -d OBJ_FORMAT_win32 -f win32 $(InputPath) \
-
-
-"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
-   $(BuildCmds)
-# End Custom Build
-
-!ELSEIF  "$(CFG)" == "FLAC - Win32 Release"
-
-# PROP Ignore_Default_Tool 1
-# Begin Custom Build - Assembling $(InputPath)...
-SOURCE="$(InputPath)"
-
-BuildCmds= \
-	nasmw -o $(IntDir)\$(InputName).obj -d OBJ_FORMAT_win32 -f win32 $(InputPath) \
-
-
-"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
-   $(BuildCmds)
-# End Custom Build
-
-!ENDIF
-
-# End Source File
-# Begin Source File
-
-SOURCE=ia32\fixed_asm.nasm
-
-!IF  "$(CFG)" == "FLAC - Win32 Debug"
-
-# PROP Ignore_Default_Tool 1
-# Begin Custom Build - Assembling $(InputPath)...
-SOURCE="$(InputPath)"
-
-BuildCmds= \
-	nasmw -o $(IntDir)\$(InputName).obj -d OBJ_FORMAT_win32 -f win32 $(InputPath) \
-
-
-"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
-   $(BuildCmds)
-# End Custom Build
-
-!ELSEIF  "$(CFG)" == "FLAC - Win32 Release"
-
-# PROP Ignore_Default_Tool 1
-# Begin Custom Build - Assembling $(InputPath)...
-SOURCE="$(InputPath)"
-
-BuildCmds= \
-	nasmw -o $(IntDir)\$(InputName).obj -d OBJ_FORMAT_win32 -f win32 $(InputPath) \
-
-
-"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
-   $(BuildCmds)
-# End Custom Build
-
-!ENDIF
-
-# End Source File
-# Begin Source File
-
-SOURCE=ia32\lpc_asm.nasm
-
-!IF  "$(CFG)" == "FLAC - Win32 Debug"
-
-# PROP Ignore_Default_Tool 1
-# Begin Custom Build - Assembling $(InputPath)...
-SOURCE="$(InputPath)"
-
-BuildCmds= \
-	nasmw -o $(IntDir)\$(InputName).obj -d OBJ_FORMAT_win32 -f win32 $(InputPath) \
-
-
-"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
-   $(BuildCmds)
-# End Custom Build
-
-!ELSEIF  "$(CFG)" == "FLAC - Win32 Release"
-
-# PROP Ignore_Default_Tool 1
-# Begin Custom Build - Assembling $(InputPath)...
-SOURCE="$(InputPath)"
-
-BuildCmds= \
-	nasmw -o $(IntDir)\$(InputName).obj -d OBJ_FORMAT_win32 -f win32 $(InputPath) \
-
-
-"$(IntDir)\$(InputName).obj" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
-   $(BuildCmds)
-# End Custom Build
-
-!ENDIF
-
-# End Source File
-# Begin Source File
-
-SOURCE=ia32\nasm.h
-# End Source File
-# End Group
-# Begin Source File
-
-SOURCE=ReadMe.txt
-# End Source File
-# End Target
-# End Project
-
diff --git a/FLAC/FLAC.vcproj b/FLAC/FLAC.vcproj
index 5c7a8edbcd..972d8070c7 100644
--- a/FLAC/FLAC.vcproj
+++ b/FLAC/FLAC.vcproj
@@ -54,6 +54,72 @@
 				WarningLevel="3"
 				Detect64BitPortabilityProblems="true"
 				DebugInformationFormat="4"
+				DisableSpecificWarnings="4996"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+				OutputFile="$(OutDir)/FLAC.lib"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug|x64"
+			OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="4"
+			InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				PreprocessorDefinitions="WIN32;_DEBUG;_LIB;FLAC__NO_DLL"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="1"
+				UsePrecompiledHeader="0"
+				WarningLevel="3"
+				Detect64BitPortabilityProblems="true"
+				DebugInformationFormat="3"
 			/>
 			<Tool
 				Name="VCManagedResourceCompilerTool"
@@ -120,71 +186,7 @@
 				WarningLevel="3"
 				Detect64BitPortabilityProblems="true"
 				DebugInformationFormat="3"
-			/>
-			<Tool
-				Name="VCManagedResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCPreLinkEventTool"
-			/>
-			<Tool
-				Name="VCLibrarianTool"
-				OutputFile="$(OutDir)/FLAC.lib"
-			/>
-			<Tool
-				Name="VCALinkTool"
-			/>
-			<Tool
-				Name="VCXDCMakeTool"
-			/>
-			<Tool
-				Name="VCBscMakeTool"
-			/>
-			<Tool
-				Name="VCFxCopTool"
-			/>
-			<Tool
-				Name="VCPostBuildEventTool"
-			/>
-		</Configuration>
-		<Configuration
-			Name="Debug|x64"
-			OutputDirectory="$(PlatformName)\$(ConfigurationName)"
-			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
-			ConfigurationType="4"
-			InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
-			CharacterSet="2"
-			>
-			<Tool
-				Name="VCPreBuildEventTool"
-			/>
-			<Tool
-				Name="VCCustomBuildTool"
-			/>
-			<Tool
-				Name="VCXMLDataGeneratorTool"
-			/>
-			<Tool
-				Name="VCWebServiceProxyGeneratorTool"
-			/>
-			<Tool
-				Name="VCMIDLTool"
-				TargetEnvironment="3"
-			/>
-			<Tool
-				Name="VCCLCompilerTool"
-				Optimization="0"
-				PreprocessorDefinitions="WIN32;_DEBUG;_LIB;FLAC__NO_DLL"
-				MinimalRebuild="true"
-				BasicRuntimeChecks="3"
-				RuntimeLibrary="1"
-				UsePrecompiledHeader="0"
-				WarningLevel="3"
-				Detect64BitPortabilityProblems="true"
-				DebugInformationFormat="3"
+				DisableSpecificWarnings="4996"
 			/>
 			<Tool
 				Name="VCManagedResourceCompilerTool"
@@ -291,11 +293,11 @@
 			Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm"
 			>
 			<File
-				RelativePath="bitbuffer.c"
+				RelativePath="bitmath.c"
 				>
 			</File>
 			<File
-				RelativePath="bitmath.c"
+				RelativePath=".\bitreader.c"
 				>
 			</File>
 			<File
@@ -318,6 +320,10 @@
 				RelativePath="lpc.c"
 				>
 			</File>
+			<File
+				RelativePath=".\md5.c"
+				>
+			</File>
 			<File
 				RelativePath=".\memory.c"
 				>
@@ -347,11 +353,11 @@
 				Name="Private"
 				>
 				<File
-					RelativePath="private\bitbuffer.h"
+					RelativePath="private\bitmath.h"
 					>
 				</File>
 				<File
-					RelativePath="private\bitmath.h"
+					RelativePath=".\private\bitreader.h"
 					>
 				</File>
 				<File
@@ -378,6 +384,10 @@
 					RelativePath="private\lpc.h"
 					>
 				</File>
+				<File
+					RelativePath=".\private\md5.h"
+					>
+				</File>
 				<File
 					RelativePath=".\private\memory.h"
 					>
@@ -419,12 +429,20 @@
 					>
 				</File>
 			</Filter>
+			<Filter
+				Name="Share"
+				>
+				<File
+					RelativePath=".\share\alloc.h"
+					>
+				</File>
+			</Filter>
 		</Filter>
 		<Filter
 			Name="IA32 Files"
 			>
 			<File
-				RelativePath="ia32\cpu_asm.nasm"
+				RelativePath=".\ia32\bitreader_asm.nasm"
 				>
 				<FileConfiguration
 					Name="Debug|Win32"
@@ -432,17 +450,7 @@
 					<Tool
 						Name="VCCustomBuildTool"
 						Description="Assembling $(InputPath)..."
-						CommandLine="nasmw -o &quot;$(IntDir)\$(InputName).obj&quot; -d OBJ_FORMAT_win32 -f win32 &quot;$(InputPath)&quot;"
-						Outputs="$(IntDir)\$(InputName).obj"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release|Win32"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						Description="Assembling $(InputPath)..."
-						CommandLine="nasmw -o &quot;$(IntDir)\$(InputName).obj&quot; -d OBJ_FORMAT_win32 -f win32 &quot;$(InputPath)&quot;"
+						CommandLine="nasmw -o &quot;$(IntDir)\$(InputName).obj&quot; -d OBJ_FORMAT_win32 -f win32 &quot;$(InputPath)&quot;&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
@@ -457,6 +465,62 @@
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						Description="Assembling $(InputPath)..."
+						CommandLine="nasmw -o &quot;$(IntDir)\$(InputName).obj&quot; -d OBJ_FORMAT_win32 -f win32 &quot;$(InputPath)&quot;&#x0D;&#x0A;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						Description="Assembling $(InputPath)..."
+						CommandLine="nasmw -o $(IntDir)\$(InputName).obj -d OBJ_FORMAT_win32 -f win32 $(InputPath)&#x0D;&#x0A;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="ia32\cpu_asm.nasm"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						Description="Assembling $(InputPath)..."
+						CommandLine="nasmw -o &quot;$(IntDir)\$(InputName).obj&quot; -d OBJ_FORMAT_win32 -f win32 &quot;$(InputPath)&quot;&#x0D;&#x0A;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						Description="Assembling $(InputPath)..."
+						CommandLine="nasmw -o $(IntDir)\$(InputName).obj -d OBJ_FORMAT_win32 -f win32 $(InputPath)&#x0D;&#x0A;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						Description="Assembling $(InputPath)..."
+						CommandLine="nasmw -o &quot;$(IntDir)\$(InputName).obj&quot; -d OBJ_FORMAT_win32 -f win32 &quot;$(InputPath)&quot;&#x0D;&#x0A;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
 				<FileConfiguration
 					Name="Release|x64"
 					ExcludedFromBuild="true"
@@ -478,17 +542,7 @@
 					<Tool
 						Name="VCCustomBuildTool"
 						Description="Assembling $(InputPath)..."
-						CommandLine="nasmw -o &quot;$(IntDir)\$(InputName).obj&quot; -d OBJ_FORMAT_win32 -f win32 &quot;$(InputPath)&quot;"
-						Outputs="$(IntDir)\$(InputName).obj"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release|Win32"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						Description="Assembling $(InputPath)..."
-						CommandLine="nasmw -o &quot;$(IntDir)\$(InputName).obj&quot; -d OBJ_FORMAT_win32 -f win32 &quot;$(InputPath)&quot;"
+						CommandLine="nasmw -o &quot;$(IntDir)\$(InputName).obj&quot; -d OBJ_FORMAT_win32 -f win32 &quot;$(InputPath)&quot;&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
@@ -503,6 +557,16 @@
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						Description="Assembling $(InputPath)..."
+						CommandLine="nasmw -o &quot;$(IntDir)\$(InputName).obj&quot; -d OBJ_FORMAT_win32 -f win32 &quot;$(InputPath)&quot;&#x0D;&#x0A;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
 				<FileConfiguration
 					Name="Release|x64"
 					ExcludedFromBuild="true"
@@ -524,17 +588,7 @@
 					<Tool
 						Name="VCCustomBuildTool"
 						Description="Assembling $(InputPath)..."
-						CommandLine="nasmw -o &quot;$(IntDir)\$(InputName).obj&quot; -d OBJ_FORMAT_win32 -f win32 &quot;$(InputPath)&quot;"
-						Outputs="$(IntDir)\$(InputName).obj"
-					/>
-				</FileConfiguration>
-				<FileConfiguration
-					Name="Release|Win32"
-					>
-					<Tool
-						Name="VCCustomBuildTool"
-						Description="Assembling $(InputPath)..."
-						CommandLine="nasmw -o &quot;$(IntDir)\$(InputName).obj&quot; -d OBJ_FORMAT_win32 -f win32 &quot;$(InputPath)&quot;"
+						CommandLine="nasmw -o &quot;$(IntDir)\$(InputName).obj&quot; -d OBJ_FORMAT_win32 -f win32 &quot;$(InputPath)&quot;&#x0D;&#x0A;"
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
@@ -549,6 +603,16 @@
 						Outputs="$(IntDir)\$(InputName).obj"
 					/>
 				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						Description="Assembling $(InputPath)..."
+						CommandLine="nasmw -o &quot;$(IntDir)\$(InputName).obj&quot; -d OBJ_FORMAT_win32 -f win32 &quot;$(InputPath)&quot;&#x0D;&#x0A;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
 				<FileConfiguration
 					Name="Release|x64"
 					ExcludedFromBuild="true"
@@ -565,7 +629,57 @@
 				RelativePath="ia32\nasm.h"
 				>
 			</File>
+			<File
+				RelativePath=".\ia32\stream_encoder_asm.nasm"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						Description="Assembling $(InputPath)..."
+						CommandLine="nasmw -o &quot;$(IntDir)\$(InputName).obj&quot; -d OBJ_FORMAT_win32 -f win32 &quot;$(InputPath)&quot;&#x0D;&#x0A;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						Description="Assembling $(InputPath)..."
+						CommandLine="nasmw -o $(IntDir)\$(InputName).obj -d OBJ_FORMAT_win32 -f win32 $(InputPath)&#x0D;&#x0A;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						Description="Assembling $(InputPath)..."
+						CommandLine="nasmw -o &quot;$(IntDir)\$(InputName).obj&quot; -d OBJ_FORMAT_win32 -f win32 &quot;$(InputPath)&quot;&#x0D;&#x0A;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						Description="Assembling $(InputPath)..."
+						CommandLine="nasmw -o $(IntDir)\$(InputName).obj -d OBJ_FORMAT_win32 -f win32 $(InputPath)&#x0D;&#x0A;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+			</File>
 		</Filter>
+		<File
+			RelativePath=".\Makefile.mgw"
+			>
+		</File>
 		<File
 			RelativePath="ReadMe.txt"
 			>
diff --git a/FLAC/FLAC/assert.h b/FLAC/FLAC/assert.h
index cd5bd8e38e..5c2d5048b8 100644
--- a/FLAC/FLAC/assert.h
+++ b/FLAC/FLAC/assert.h
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2001,2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
diff --git a/FLAC/FLAC/export.h b/FLAC/FLAC/export.h
index 740f6034d9..5a5654c0fd 100644
--- a/FLAC/FLAC/export.h
+++ b/FLAC/FLAC/export.h
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -32,6 +32,29 @@
 #ifndef FLAC__EXPORT_H
 #define FLAC__EXPORT_H
 
+/** \file include/FLAC/export.h
+ *
+ *  \brief
+ *  This module contains #defines and symbols for exporting function
+ *  calls, and providing version information and compiled-in features.
+ *
+ *  See the \link flac_export export \endlink module.
+ */
+
+/** \defgroup flac_export FLAC/export.h: export symbols
+ *  \ingroup flac
+ *
+ *  \brief
+ *  This module contains #defines and symbols for exporting function
+ *  calls, and providing version information and compiled-in features.
+ *
+ *  If you are compiling with MSVC and will link to the static library
+ *  (libFLAC.lib) you should define FLAC__NO_DLL in your project to
+ *  make sure the symbols are exported properly.
+ *
+ * \{
+ */
+
 #if defined(FLAC__NO_DLL) || !defined(_MSC_VER)
 #define FLAC_API
 
@@ -44,4 +67,25 @@
 
 #endif
 #endif
+
+/** These #defines will mirror the libtool-based library version number, see
+ * http://www.gnu.org/software/libtool/manual.html#Libtool-versioning
+ */
+#define FLAC_API_VERSION_CURRENT 10
+#define FLAC_API_VERSION_REVISION 0 /**< see above */
+#define FLAC_API_VERSION_AGE 2 /**< see above */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \c 1 if the library has been compiled with support for Ogg FLAC, else \c 0. */
+extern FLAC_API int FLAC_API_SUPPORTS_OGG_FLAC;
+
+#ifdef __cplusplus
+}
+#endif
+
+/* \} */
+
 #endif
diff --git a/FLAC/FLAC/format.h b/FLAC/FLAC/format.h
index 35fa76d1d7..75c06c1bb0 100644
--- a/FLAC/FLAC/format.h
+++ b/FLAC/FLAC/format.h
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -97,6 +97,10 @@ extern "C" {
 /** The maximum block size, in samples, permitted by the format. */
 #define FLAC__MAX_BLOCK_SIZE (65535u)
 
+/** The maximum block size, in samples, permitted by the FLAC subset for
+ *  sample rates up to 48kHz. */
+#define FLAC__SUBSET_MAX_BLOCK_SIZE_48000HZ (4608u)
+
 /** The maximum number of channels permitted by the format. */
 #define FLAC__MAX_CHANNELS (8u)
 
@@ -125,6 +129,10 @@ extern "C" {
 /** The maximum LPC order permitted by the format. */
 #define FLAC__MAX_LPC_ORDER (32u)
 
+/** The maximum LPC order permitted by the FLAC subset for sample rates
+ *  up to 48kHz. */
+#define FLAC__SUBSET_MAX_LPC_ORDER_48000HZ (12u)
+
 /** The minimum quantized linear predictor coefficient precision
  *  permitted by the format.
  */
@@ -183,9 +191,13 @@ extern FLAC_API const unsigned FLAC__STREAM_SYNC_LEN; /* = 32 bits */
 
 /** An enumeration of the available entropy coding methods. */
 typedef enum {
-	FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE = 0
+	FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE = 0,
 	/**< Residual is coded by partitioning into contexts, each with it's own
-	 * Rice parameter. */
+	 * 4-bit Rice parameter. */
+
+	FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2 = 1
+	/**< Residual is coded by partitioning into contexts, each with it's own
+	 * 5-bit Rice parameter. */
 } FLAC__EntropyCodingMethodType;
 
 /** Maps a FLAC__EntropyCodingMethodType to a C string.
@@ -204,7 +216,9 @@ typedef struct {
 	/**< The Rice parameters for each context. */
 
 	unsigned *raw_bits;
-	/**< Widths for escape-coded partitions. */
+	/**< Widths for escape-coded partitions.  Will be non-zero for escaped
+	 * partitions and zero for unescaped partitions.
+	 */
 
 	unsigned capacity_by_order;
 	/**< The capacity of the \a parameters and \a raw_bits arrays
@@ -227,10 +241,13 @@ typedef struct {
 
 extern FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ORDER_LEN; /**< == 4 (bits) */
 extern FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN; /**< == 4 (bits) */
+extern FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_PARAMETER_LEN; /**< == 5 (bits) */
 extern FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_RAW_LEN; /**< == 5 (bits) */
 
 extern FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER;
 /**< == (1<<FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN)-1 */
+extern FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_ESCAPE_PARAMETER;
+/**< == (1<<FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_PARAMETER_LEN)-1 */
 
 /** Header for the entropy coding method.  (c.f. <A HREF="../format.html#residual">format specification</A>)
  */
@@ -334,14 +351,21 @@ typedef struct {
 	unsigned wasted_bits;
 } FLAC__Subframe;
 
-extern FLAC_API const unsigned FLAC__SUBFRAME_ZERO_PAD_LEN; /**< == 1 (bit) */
+/** == 1 (bit)
+ *
+ * This used to be a zero-padding bit (hence the name
+ * FLAC__SUBFRAME_ZERO_PAD_LEN) but is now a reserved bit.  It still has a
+ * mandatory value of \c 0 but in the future may take on the value \c 0 or \c 1
+ * to mean something else.
+ */
+extern FLAC_API const unsigned FLAC__SUBFRAME_ZERO_PAD_LEN;
 extern FLAC_API const unsigned FLAC__SUBFRAME_TYPE_LEN; /**< == 6 (bits) */
 extern FLAC_API const unsigned FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN; /**< == 1 (bit) */
 
-extern FLAC_API const unsigned FLAC__SUBFRAME_TYPE_CONSTANT_BYTE_ALIGNED_MASK; /* = 0x00 */
-extern FLAC_API const unsigned FLAC__SUBFRAME_TYPE_VERBATIM_BYTE_ALIGNED_MASK; /* = 0x02 */
-extern FLAC_API const unsigned FLAC__SUBFRAME_TYPE_FIXED_BYTE_ALIGNED_MASK; /* = 0x10 */
-extern FLAC_API const unsigned FLAC__SUBFRAME_TYPE_LPC_BYTE_ALIGNED_MASK; /* = 0x40 */
+extern FLAC_API const unsigned FLAC__SUBFRAME_TYPE_CONSTANT_BYTE_ALIGNED_MASK; /**< = 0x00 */
+extern FLAC_API const unsigned FLAC__SUBFRAME_TYPE_VERBATIM_BYTE_ALIGNED_MASK; /**< = 0x02 */
+extern FLAC_API const unsigned FLAC__SUBFRAME_TYPE_FIXED_BYTE_ALIGNED_MASK; /**< = 0x10 */
+extern FLAC_API const unsigned FLAC__SUBFRAME_TYPE_LPC_BYTE_ALIGNED_MASK; /**< = 0x40 */
 
 /*****************************************************************************/
 
@@ -400,7 +424,9 @@ typedef struct {
 	/**< The sample resolution. */
 
 	FLAC__FrameNumberType number_type;
-	/**< The numbering scheme used for the frame. */
+	/**< The numbering scheme used for the frame.  As a convenience, the
+	 * decoder will always convert a frame number to a sample number because
+	 * the rules are complex. */
 
 	union {
 		FLAC__uint32 frame_number;
@@ -418,7 +444,8 @@ typedef struct {
 
 extern FLAC_API const unsigned FLAC__FRAME_HEADER_SYNC; /**< == 0x3ffe; the frame header sync code */
 extern FLAC_API const unsigned FLAC__FRAME_HEADER_SYNC_LEN; /**< == 14 (bits) */
-extern FLAC_API const unsigned FLAC__FRAME_HEADER_RESERVED_LEN; /**< == 2 (bits) */
+extern FLAC_API const unsigned FLAC__FRAME_HEADER_RESERVED_LEN; /**< == 1 (bits) */
+extern FLAC_API const unsigned FLAC__FRAME_HEADER_BLOCKING_STRATEGY_LEN; /**< == 1 (bits) */
 extern FLAC_API const unsigned FLAC__FRAME_HEADER_BLOCK_SIZE_LEN; /**< == 4 (bits) */
 extern FLAC_API const unsigned FLAC__FRAME_HEADER_SAMPLE_RATE_LEN; /**< == 4 (bits) */
 extern FLAC_API const unsigned FLAC__FRAME_HEADER_CHANNEL_ASSIGNMENT_LEN; /**< == 4 (bits) */
@@ -478,7 +505,10 @@ typedef enum {
 	FLAC__METADATA_TYPE_CUESHEET = 5,
 	/**< <A HREF="../format.html#metadata_block_cuesheet">CUESHEET</A> block */
 
-	FLAC__METADATA_TYPE_UNDEFINED = 6
+	FLAC__METADATA_TYPE_PICTURE = 6,
+	/**< <A HREF="../format.html#metadata_block_picture">PICTURE</A> block */
+
+	FLAC__METADATA_TYPE_UNDEFINED = 7
 	/**< marker to denote beginning of undefined type range; this number will increase as new metadata types are added */
 
 } FLAC__MetadataType;
@@ -638,7 +668,7 @@ typedef struct {
 	/**< The track number. */
 
 	char isrc[13];
-	/**< Track ISRC.  This is a 12-digit alphanumeric code plus a trailing '\0' */
+	/**< Track ISRC.  This is a 12-digit alphanumeric code plus a trailing \c NUL byte */
 
 	unsigned type:1;
 	/**< The track type: 0 for audio, 1 for non-audio. */
@@ -678,7 +708,7 @@ typedef struct {
 	/**< The number of lead-in samples. */
 
 	FLAC__bool is_cd;
-	/**< \c true if CUESHEET corresponds to a Compact Disc, else \c false */
+	/**< \c true if CUESHEET corresponds to a Compact Disc, else \c false. */
 
 	unsigned num_tracks;
 	/**< The number of tracks. */
@@ -695,6 +725,98 @@ extern FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_RESERVED_LEN; /**<
 extern FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_NUM_TRACKS_LEN; /**< == 8 (bits) */
 
 
+/** An enumeration of the PICTURE types (see FLAC__StreamMetadataPicture and id3 v2.4 APIC tag). */
+typedef enum {
+	FLAC__STREAM_METADATA_PICTURE_TYPE_OTHER = 0, /**< Other */
+	FLAC__STREAM_METADATA_PICTURE_TYPE_FILE_ICON_STANDARD = 1, /**< 32x32 pixels 'file icon' (PNG only) */
+	FLAC__STREAM_METADATA_PICTURE_TYPE_FILE_ICON = 2, /**< Other file icon */
+	FLAC__STREAM_METADATA_PICTURE_TYPE_FRONT_COVER = 3, /**< Cover (front) */
+	FLAC__STREAM_METADATA_PICTURE_TYPE_BACK_COVER = 4, /**< Cover (back) */
+	FLAC__STREAM_METADATA_PICTURE_TYPE_LEAFLET_PAGE = 5, /**< Leaflet page */
+	FLAC__STREAM_METADATA_PICTURE_TYPE_MEDIA = 6, /**< Media (e.g. label side of CD) */
+	FLAC__STREAM_METADATA_PICTURE_TYPE_LEAD_ARTIST = 7, /**< Lead artist/lead performer/soloist */
+	FLAC__STREAM_METADATA_PICTURE_TYPE_ARTIST = 8, /**< Artist/performer */
+	FLAC__STREAM_METADATA_PICTURE_TYPE_CONDUCTOR = 9, /**< Conductor */
+	FLAC__STREAM_METADATA_PICTURE_TYPE_BAND = 10, /**< Band/Orchestra */
+	FLAC__STREAM_METADATA_PICTURE_TYPE_COMPOSER = 11, /**< Composer */
+	FLAC__STREAM_METADATA_PICTURE_TYPE_LYRICIST = 12, /**< Lyricist/text writer */
+	FLAC__STREAM_METADATA_PICTURE_TYPE_RECORDING_LOCATION = 13, /**< Recording Location */
+	FLAC__STREAM_METADATA_PICTURE_TYPE_DURING_RECORDING = 14, /**< During recording */
+	FLAC__STREAM_METADATA_PICTURE_TYPE_DURING_PERFORMANCE = 15, /**< During performance */
+	FLAC__STREAM_METADATA_PICTURE_TYPE_VIDEO_SCREEN_CAPTURE = 16, /**< Movie/video screen capture */
+	FLAC__STREAM_METADATA_PICTURE_TYPE_FISH = 17, /**< A bright coloured fish */
+	FLAC__STREAM_METADATA_PICTURE_TYPE_ILLUSTRATION = 18, /**< Illustration */
+	FLAC__STREAM_METADATA_PICTURE_TYPE_BAND_LOGOTYPE = 19, /**< Band/artist logotype */
+	FLAC__STREAM_METADATA_PICTURE_TYPE_PUBLISHER_LOGOTYPE = 20, /**< Publisher/Studio logotype */
+	FLAC__STREAM_METADATA_PICTURE_TYPE_UNDEFINED
+} FLAC__StreamMetadata_Picture_Type;
+
+/** Maps a FLAC__StreamMetadata_Picture_Type to a C string.
+ *
+ *  Using a FLAC__StreamMetadata_Picture_Type as the index to this array
+ *  will give the string equivalent.  The contents should not be
+ *  modified.
+ */
+extern FLAC_API const char * const FLAC__StreamMetadata_Picture_TypeString[];
+
+/** FLAC PICTURE structure.  (See the
+ * <A HREF="../format.html#metadata_block_picture">format specification</A>
+ * for the full description of each field.)
+ */
+typedef struct {
+	FLAC__StreamMetadata_Picture_Type type;
+	/**< The kind of picture stored. */
+
+	char *mime_type;
+	/**< Picture data's MIME type, in ASCII printable characters
+	 * 0x20-0x7e, NUL terminated.  For best compatibility with players,
+	 * use picture data of MIME type \c image/jpeg or \c image/png.  A
+	 * MIME type of '-->' is also allowed, in which case the picture
+	 * data should be a complete URL.  In file storage, the MIME type is
+	 * stored as a 32-bit length followed by the ASCII string with no NUL
+	 * terminator, but is converted to a plain C string in this structure
+	 * for convenience.
+	 */
+
+	FLAC__byte *description;
+	/**< Picture's description in UTF-8, NUL terminated.  In file storage,
+	 * the description is stored as a 32-bit length followed by the UTF-8
+	 * string with no NUL terminator, but is converted to a plain C string
+	 * in this structure for convenience.
+	 */
+
+	FLAC__uint32 width;
+	/**< Picture's width in pixels. */
+
+	FLAC__uint32 height;
+	/**< Picture's height in pixels. */
+
+	FLAC__uint32 depth;
+	/**< Picture's color depth in bits-per-pixel. */
+
+	FLAC__uint32 colors;
+	/**< For indexed palettes (like GIF), picture's number of colors (the
+	 * number of palette entries), or \c 0 for non-indexed (i.e. 2^depth).
+	 */
+
+	FLAC__uint32 data_length;
+	/**< Length of binary picture data in bytes. */
+
+	FLAC__byte *data;
+	/**< Binary picture data. */
+
+} FLAC__StreamMetadata_Picture;
+
+extern FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_TYPE_LEN; /**< == 32 (bits) */
+extern FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_MIME_TYPE_LENGTH_LEN; /**< == 32 (bits) */
+extern FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_DESCRIPTION_LENGTH_LEN; /**< == 32 (bits) */
+extern FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_WIDTH_LEN; /**< == 32 (bits) */
+extern FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_HEIGHT_LEN; /**< == 32 (bits) */
+extern FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_DEPTH_LEN; /**< == 32 (bits) */
+extern FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_COLORS_LEN; /**< == 32 (bits) */
+extern FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_DATA_LENGTH_LEN; /**< == 32 (bits) */
+
+
 /** Structure that is used when a metadata block of unknown type is loaded.
  *  The contents are opaque.  The structure is used only internally to
  *  correctly handle unknown metadata.
@@ -725,6 +847,7 @@ typedef struct {
 		FLAC__StreamMetadata_SeekTable seek_table;
 		FLAC__StreamMetadata_VorbisComment vorbis_comment;
 		FLAC__StreamMetadata_CueSheet cue_sheet;
+		FLAC__StreamMetadata_Picture picture;
 		FLAC__StreamMetadata_Unknown unknown;
 	} data;
 	/**< Polymorphic block data; use the \a type value to determine which
@@ -747,8 +870,7 @@ extern FLAC_API const unsigned FLAC__STREAM_METADATA_LENGTH_LEN; /**< == 24 (bit
  *
  *****************************************************************************/
 
-/** Tests that a sample rate is valid for FLAC.  Since the rules for valid
- *  sample rates are slightly complex, they are encapsulated in this function.
+/** Tests that a sample rate is valid for FLAC.
  *
  * \param sample_rate  The sample rate to test for compliance.
  * \retval FLAC__bool
@@ -757,6 +879,17 @@ extern FLAC_API const unsigned FLAC__STREAM_METADATA_LENGTH_LEN; /**< == 24 (bit
  */
 FLAC_API FLAC__bool FLAC__format_sample_rate_is_valid(unsigned sample_rate);
 
+/** Tests that a sample rate is valid for the FLAC subset.  The subset rules
+ *  for valid sample rates are slightly more complex since the rate has to
+ *  be expressible completely in the frame header.
+ *
+ * \param sample_rate  The sample rate to test for compliance.
+ * \retval FLAC__bool
+ *    \c true if the given sample rate conforms to the specification for the
+ *    subset, else \c false.
+ */
+FLAC_API FLAC__bool FLAC__format_sample_rate_is_subset(unsigned sample_rate);
+
 /** Check a Vorbis comment entry name to see if it conforms to the Vorbis
  *  comment specification.
  *
@@ -795,7 +928,8 @@ FLAC_API FLAC__bool FLAC__format_vorbiscomment_entry_value_is_legal(const FLAC__
  *  FLAC__format_vorbiscomment_entry_name_is_legal() and
  *  FLAC__format_vorbiscomment_entry_value_is_legal() respectively.
  *
- * \param value      A string to be checked.
+ * \param entry      An entry to be checked.
+ * \param length     The length of \a entry in bytes.
  * \assert
  *    \code value != NULL \endcode
  * \retval FLAC__bool
@@ -803,7 +937,6 @@ FLAC_API FLAC__bool FLAC__format_vorbiscomment_entry_value_is_legal(const FLAC__
  */
 FLAC_API FLAC__bool FLAC__format_vorbiscomment_entry_is_legal(const FLAC__byte *entry, unsigned length);
 
-/* @@@@ add to unit tests; it is already indirectly tested by the metadata_object tests */
 /** Check a seek table to see if it conforms to the FLAC specification.
  *  See the format specification for limits on the contents of the
  *  seek table.
@@ -816,7 +949,6 @@ FLAC_API FLAC__bool FLAC__format_vorbiscomment_entry_is_legal(const FLAC__byte *
  */
 FLAC_API FLAC__bool FLAC__format_seektable_is_legal(const FLAC__StreamMetadata_SeekTable *seek_table);
 
-/* @@@@ add to unit tests; it is already indirectly tested by the metadata_object tests */
 /** Sort a seek table's seek points according to the format specification.
  *  This includes a "unique-ification" step to remove duplicates, i.e.
  *  seek points with identical \a sample_number values.  Duplicate seek
@@ -831,7 +963,6 @@ FLAC_API FLAC__bool FLAC__format_seektable_is_legal(const FLAC__StreamMetadata_S
  */
 FLAC_API unsigned FLAC__format_seektable_sort(FLAC__StreamMetadata_SeekTable *seek_table);
 
-/* @@@@ add to unit tests; it is already indirectly tested by the metadata_object tests */
 /** Check a cue sheet to see if it conforms to the FLAC specification.
  *  See the format specification for limits on the contents of the
  *  cue sheet.
@@ -852,6 +983,24 @@ FLAC_API unsigned FLAC__format_seektable_sort(FLAC__StreamMetadata_SeekTable *se
  */
 FLAC_API FLAC__bool FLAC__format_cuesheet_is_legal(const FLAC__StreamMetadata_CueSheet *cue_sheet, FLAC__bool check_cd_da_subset, const char **violation);
 
+/** Check picture data to see if it conforms to the FLAC specification.
+ *  See the format specification for limits on the contents of the
+ *  PICTURE block.
+ *
+ * \param picture    A pointer to existing picture data to be checked.
+ * \param violation  Address of a pointer to a string.  If there is a
+ *                   violation, a pointer to a string explanation of the
+ *                   violation will be returned here. \a violation may be
+ *                   \c NULL if you don't need the returned string.  Do not
+ *                   free the returned string; it will always point to static
+ *                   data.
+ * \assert
+ *    \code picture != NULL \endcode
+ * \retval FLAC__bool
+ *    \c false if picture data is illegal, else \c true.
+ */
+FLAC_API FLAC__bool FLAC__format_picture_is_legal(const FLAC__StreamMetadata_Picture *picture, const char **violation);
+
 /* \} */
 
 #ifdef __cplusplus
diff --git a/FLAC/FLAC/ordinals.h b/FLAC/FLAC/ordinals.h
index 6160dcd72f..b3ea5928ce 100644
--- a/FLAC/FLAC/ordinals.h
+++ b/FLAC/FLAC/ordinals.h
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -32,14 +32,14 @@
 #ifndef FLAC__ORDINALS_H
 #define FLAC__ORDINALS_H
 
-#if !(defined(_MSC_VER) || defined(__EMX__))
+#if !(defined(_MSC_VER) || defined(__BORLANDC__) || defined(__EMX__))
 #include <inttypes.h>
 #endif
 
 typedef signed char FLAC__int8;
 typedef unsigned char FLAC__uint8;
 
-#if defined _MSC_VER
+#if defined(_MSC_VER) || defined(__BORLANDC__)
 typedef __int16 FLAC__int16;
 typedef __int32 FLAC__int32;
 typedef __int64 FLAC__int64;
diff --git a/FLAC/FLAC/stream_decoder.h b/FLAC/FLAC/stream_decoder.h
index 5b8d13d38e..5818fdd334 100644
--- a/FLAC/FLAC/stream_decoder.h
+++ b/FLAC/FLAC/stream_decoder.h
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -32,6 +32,7 @@
 #ifndef FLAC__STREAM_DECODER_H
 #define FLAC__STREAM_DECODER_H
 
+#include <stdio.h> /* for FILE */
 #include "export.h"
 #include "format.h"
 
@@ -50,28 +51,22 @@ extern "C" {
  *  \link flac_stream_decoder stream decoder \endlink module.
  */
 
-/** \defgroup flac_decoder FLAC/ *_decoder.h: decoder interfaces
+/** \defgroup flac_decoder FLAC/ \*_decoder.h: decoder interfaces
  *  \ingroup flac
  *
  *  \brief
- *  This module describes the three decoder layers provided by libFLAC.
+ *  This module describes the decoder layers provided by libFLAC.
  *
- * For decoding FLAC streams, libFLAC provides three layers of access.  The
- * lowest layer is non-seekable stream-level decoding, the next is seekable
- * stream-level decoding, and the highest layer is file-level decoding.  The
- * interfaces are described in the \link flac_stream_decoder stream decoder
- * \endlink, \link flac_seekable_stream_decoder seekable stream decoder
- * \endlink, and \link flac_file_decoder file decoder \endlink modules
- * respectively.  Typically you will choose the highest layer that your input
- * source will support.
- *
- * The stream decoder relies on callbacks for all input and output and has no
- * provisions for seeking.  The seekable stream decoder wraps the stream
- * decoder and exposes functions for seeking.  However, you must provide
- * extra callbacks for seek-related operations on your stream, like seek and
- * tell.  The file decoder wraps the seekable stream decoder and supplies
- * most of the callbacks internally, simplifying the processing of standard
- * files.
+ * The stream decoder can be used to decode complete streams either from
+ * the client via callbacks, or directly from a file, depending on how
+ * it is initialized.  When decoding via callbacks, the client provides
+ * callbacks for reading FLAC data and writing decoded samples, and
+ * handling metadata and errors.  If the client also supplies seek-related
+ * callback, the decoder function for sample-accurate seeking within the
+ * FLAC input is also available.  When decoding from a file, the client
+ * needs only supply a filename or open \c FILE* and write/metadata/error
+ * callbacks; the rest of the callbacks are supplied internally.  For more
+ * info see the \link flac_stream_decoder stream decoder \endlink module.
  */
 
 /** \defgroup flac_stream_decoder FLAC/stream_decoder.h: stream decoder interface
@@ -81,14 +76,20 @@ extern "C" {
  *  This module contains the functions which implement the stream
  *  decoder.
  *
+ * The stream decoder can decode native FLAC, and optionally Ogg FLAC
+ * (check FLAC_API_SUPPORTS_OGG_FLAC) streams and files.
+ *
  * The basic usage of this decoder is as follows:
  * - The program creates an instance of a decoder using
  *   FLAC__stream_decoder_new().
- * - The program overrides the default settings and sets callbacks for
- *   reading, writing, error reporting, and metadata reporting using
+ * - The program overrides the default settings using
  *   FLAC__stream_decoder_set_*() functions.
  * - The program initializes the instance to validate the settings and
- *   prepare for decoding using FLAC__stream_decoder_init().
+ *   prepare for decoding using
+ *   - FLAC__stream_decoder_init_stream() or FLAC__stream_decoder_init_FILE()
+ *     or FLAC__stream_decoder_init_file() for native FLAC,
+ *   - FLAC__stream_decoder_init_ogg_stream() or FLAC__stream_decoder_init_ogg_FILE()
+ *     or FLAC__stream_decoder_init_ogg_file() for Ogg FLAC
  * - The program calls the FLAC__stream_decoder_process_*() functions
  *   to decode data, which subsequently calls the callbacks.
  * - The program finishes the decoding with FLAC__stream_decoder_finish(),
@@ -99,33 +100,25 @@ extern "C" {
  *
  * In more detail, the program will create a new instance by calling
  * FLAC__stream_decoder_new(), then call FLAC__stream_decoder_set_*()
- * functions to set the callbacks and client data, and call
- * FLAC__stream_decoder_init().  The required callbacks are:
+ * functions to override the default decoder options, and call
+ * one of the FLAC__stream_decoder_init_*() functions.
  *
- * - Read callback - This function will be called when the decoder needs
- *   more input data.  The address of the buffer to be filled is supplied,
- *   along with the number of bytes the buffer can hold.  The callback may
- *   choose to supply less data and modify the byte count but must be careful
- *   not to overflow the buffer.  The callback then returns a status code
- *   chosen from FLAC__StreamDecoderReadStatus.
- * - Write callback - This function will be called when the decoder has
- *   decoded a single frame of data.  The decoder will pass the frame
- *   metadata as well as an array of pointers (one for each channel)
- *   pointing to the decoded audio.
- * - Metadata callback - This function will be called when the decoder has
- *   decoded a metadata block.  In a valid FLAC file there will always be
- *   one STREAMINFO block, followed by zero or more other metadata
- *   blocks.  These will be supplied by the decoder in the same order as
- *   they appear in the stream and always before the first audio frame
- *   (i.e. write callback).  The metadata block that is passed in must not
- *   be modified, and it doesn't live beyond the callback, so you should
- *   make a copy of it with FLAC__metadata_object_clone() if you will need
- *   it elsewhere.  Since metadata blocks can potentially be large, by
- *   default the decoder only calls the metadata callback for the STREAMINFO
- *   block; you can instruct the decoder to pass or filter other blocks with
- *   FLAC__stream_decoder_set_metadata_*() calls.
- * - Error callback - This function will be called whenever an error occurs
- *   during decoding.
+ * There are three initialization functions for native FLAC, one for
+ * setting up the decoder to decode FLAC data from the client via
+ * callbacks, and two for decoding directly from a FLAC file.
+ *
+ * For decoding via callbacks, use FLAC__stream_decoder_init_stream().
+ * You must also supply several callbacks for handling I/O.  Some (like
+ * seeking) are optional, depending on the capabilities of the input.
+ *
+ * For decoding directly from a file, use FLAC__stream_decoder_init_FILE()
+ * or FLAC__stream_decoder_init_file().  Then you must only supply an open
+ * \c FILE* or filename and fewer callbacks; the decoder will handle
+ * the other callbacks internally.
+ *
+ * There are three similarly-named init functions for decoding from Ogg
+ * FLAC streams.  Check \c FLAC_API_SUPPORTS_OGG_FLAC to find out if the
+ * library has been built with Ogg support.
  *
  * Once the decoder is initialized, your program will call one of several
  * functions to start the decoding process:
@@ -136,12 +129,12 @@ extern "C" {
  *   loses sync it will return with only the error callback being called.
  * - FLAC__stream_decoder_process_until_end_of_metadata() - Tells the decoder
  *   to process the stream from the current location and stop upon reaching
- *   the first audio frame.  The user will get one metadata, write, or error
+ *   the first audio frame.  The client will get one metadata, write, or error
  *   callback per metadata block, audio frame, or sync error, respectively.
  * - FLAC__stream_decoder_process_until_end_of_stream() - Tells the decoder
  *   to process the stream from the current location until the read callback
  *   returns FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM or
- *   FLAC__STREAM_DECODER_READ_STATUS_ABORT.  The user will get one metadata,
+ *   FLAC__STREAM_DECODER_READ_STATUS_ABORT.  The client will get one metadata,
  *   write, or error callback per metadata block, audio frame, or sync error,
  *   respectively.
  *
@@ -151,11 +144,23 @@ extern "C" {
  * instance may be deleted with FLAC__stream_decoder_delete() or initialized
  * again to decode another stream.
  *
- * Note that the stream decoder has no real concept of stream position, it
- * just converts data.  To seek within a stream the callbacks have only to
- * flush the decoder using FLAC__stream_decoder_flush() and start feeding
- * data from the new position through the read callback.  The seekable
- * stream decoder does just this.
+ * Seeking is exposed through the FLAC__stream_decoder_seek_absolute() method.
+ * At any point after the stream decoder has been initialized, the client can
+ * call this function to seek to an exact sample within the stream.
+ * Subsequently, the first time the write callback is called it will be
+ * passed a (possibly partial) block starting at that sample.
+ *
+ * If the client cannot seek via the callback interface provided, but still
+ * has another way of seeking, it can flush the decoder using
+ * FLAC__stream_decoder_flush() and start feeding data from the new position
+ * through the read callback.
+ *
+ * The stream decoder also provides MD5 signature checking.  If this is
+ * turned on before initialization, FLAC__stream_decoder_finish() will
+ * report when the decoded MD5 signature does not match the one stored
+ * in the STREAMINFO block.  MD5 checking is automatically turned off
+ * (until the next FLAC__stream_decoder_reset()) if there is no signature
+ * in the STREAMINFO block or when a seek is attempted.
  *
  * The FLAC__stream_decoder_set_metadata_*() functions deserve special
  * attention.  By default, the decoder only calls the metadata_callback for
@@ -163,13 +168,13 @@ extern "C" {
  * explicitly which blocks to parse and return via the metadata_callback
  * and/or which to skip.  Use a FLAC__stream_decoder_set_metadata_respond_all(),
  * FLAC__stream_decoder_set_metadata_ignore() ... or FLAC__stream_decoder_set_metadata_ignore_all(),
- * FLAC__stream_decoder_set_metadata_respond() ... sequence to exactly specify which
- * blocks to return.  Remember that some metadata blocks can be big so
- * filtering out the ones you don't use can reduce the memory requirements
- * of the decoder.  Also note the special forms
- * FLAC__stream_decoder_set_metadata_respond_application(id) and
- * FLAC__stream_decoder_set_metadata_ignore_application(id) for filtering APPLICATION
- * blocks based on the application ID.
+ * FLAC__stream_decoder_set_metadata_respond() ... sequence to exactly specify
+ * which blocks to return.  Remember that metadata blocks can potentially
+ * be big (for example, cover art) so filtering out the ones you don't
+ * use can reduce the memory requirements of the decoder.  Also note the
+ * special forms FLAC__stream_decoder_set_metadata_respond_application(id)
+ * and FLAC__stream_decoder_set_metadata_ignore_application(id) for
+ * filtering APPLICATION blocks based on the application ID.
  *
  * STREAMINFO and SEEKTABLE blocks are always parsed and used internally, but
  * they still can legally be filtered from the metadata_callback.
@@ -178,7 +183,7 @@ extern "C" {
  * The "set" functions may only be called when the decoder is in the
  * state FLAC__STREAM_DECODER_UNINITIALIZED, i.e. after
  * FLAC__stream_decoder_new() or FLAC__stream_decoder_finish(), but
- * before FLAC__stream_decoder_init().  If this is the case they will
+ * before FLAC__stream_decoder_init_*().  If this is the case they will
  * return \c true, otherwise \c false.
  *
  * \note
@@ -191,7 +196,7 @@ extern "C" {
 
 /** State values for a FLAC__StreamDecoder
  *
- *  The decoder's state can be obtained by calling FLAC__stream_decoder_get_state().
+ * The decoder's state can be obtained by calling FLAC__stream_decoder_get_state().
  */
 typedef enum {
 
@@ -202,7 +207,9 @@ typedef enum {
 	/**< The decoder is ready to or is in the process of reading metadata. */
 
 	FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC,
-	/**< The decoder is ready to or is in the process of searching for the frame sync code. */
+	/**< The decoder is ready to or is in the process of searching for the
+	 * frame sync code.
+	 */
 
 	FLAC__STREAM_DECODER_READ_FRAME,
 	/**< The decoder is ready to or is in the process of reading a frame. */
@@ -210,26 +217,28 @@ typedef enum {
 	FLAC__STREAM_DECODER_END_OF_STREAM,
 	/**< The decoder has reached the end of the stream. */
 
+	FLAC__STREAM_DECODER_OGG_ERROR,
+	/**< An error occurred in the underlying Ogg layer.  */
+
+	FLAC__STREAM_DECODER_SEEK_ERROR,
+	/**< An error occurred while seeking.  The decoder must be flushed
+	 * with FLAC__stream_decoder_flush() or reset with
+	 * FLAC__stream_decoder_reset() before decoding can continue.
+	 */
+
 	FLAC__STREAM_DECODER_ABORTED,
 	/**< The decoder was aborted by the read callback. */
 
-	FLAC__STREAM_DECODER_UNPARSEABLE_STREAM,
-	/**< The decoder encountered reserved fields in use in the stream. */
-
 	FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR,
-	/**< An error occurred allocating memory. */
-
-	FLAC__STREAM_DECODER_ALREADY_INITIALIZED,
-	/**< FLAC__stream_decoder_init() was called when the decoder was
-	 * already initialized, usually because
-	 * FLAC__stream_decoder_finish() was not called.
+	/**< An error occurred allocating memory.  The decoder is in an invalid
+	 * state and can no longer be used.
 	 */
 
-	FLAC__STREAM_DECODER_INVALID_CALLBACK,
-	/**< FLAC__stream_decoder_init() was called without all callbacks being set. */
-
 	FLAC__STREAM_DECODER_UNINITIALIZED
-	/**< The decoder is in the uninitialized state. */
+	/**< The decoder is in the uninitialized state; one of the
+	 * FLAC__stream_decoder_init_*() functions must be called before samples
+	 * can be processed.
+	 */
 
 } FLAC__StreamDecoderState;
 
@@ -241,6 +250,44 @@ typedef enum {
 extern FLAC_API const char * const FLAC__StreamDecoderStateString[];
 
 
+/** Possible return values for the FLAC__stream_decoder_init_*() functions.
+ */
+typedef enum {
+
+	FLAC__STREAM_DECODER_INIT_STATUS_OK = 0,
+	/**< Initialization was successful. */
+
+	FLAC__STREAM_DECODER_INIT_STATUS_UNSUPPORTED_CONTAINER,
+	/**< The library was not compiled with support for the given container
+	 * format.
+	 */
+
+	FLAC__STREAM_DECODER_INIT_STATUS_INVALID_CALLBACKS,
+	/**< A required callback was not supplied. */
+
+	FLAC__STREAM_DECODER_INIT_STATUS_MEMORY_ALLOCATION_ERROR,
+	/**< An error occurred allocating memory. */
+
+	FLAC__STREAM_DECODER_INIT_STATUS_ERROR_OPENING_FILE,
+	/**< fopen() failed in FLAC__stream_decoder_init_file() or
+	 * FLAC__stream_decoder_init_ogg_file(). */
+
+	FLAC__STREAM_DECODER_INIT_STATUS_ALREADY_INITIALIZED
+	/**< FLAC__stream_decoder_init_*() was called when the decoder was
+	 * already initialized, usually because
+	 * FLAC__stream_decoder_finish() was not called.
+	 */
+
+} FLAC__StreamDecoderInitStatus;
+
+/** Maps a FLAC__StreamDecoderInitStatus to a C string.
+ *
+ *  Using a FLAC__StreamDecoderInitStatus as the index to this array
+ *  will give the string equivalent.  The contents should not be modified.
+ */
+extern FLAC_API const char * const FLAC__StreamDecoderInitStatusString[];
+
+
 /** Return values for the FLAC__StreamDecoder read callback.
  */
 typedef enum {
@@ -249,7 +296,15 @@ typedef enum {
 	/**< The read was OK and decoding can continue. */
 
 	FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM,
-	/**< The read was attempted at the end of the stream. */
+	/**< The read was attempted while at the end of the stream.  Note that
+	 * the client must only return this value when the read callback was
+	 * called when already at the end of the stream.  Otherwise, if the read
+	 * itself moves to the end of the stream, the client should still return
+	 * the data and \c FLAC__STREAM_DECODER_READ_STATUS_CONTINUE, and then on
+	 * the next read callback it should return
+	 * \c FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM with a byte count
+	 * of \c 0.
+	 */
 
 	FLAC__STREAM_DECODER_READ_STATUS_ABORT
 	/**< An unrecoverable error occurred.  The decoder will return from the process call. */
@@ -264,6 +319,75 @@ typedef enum {
 extern FLAC_API const char * const FLAC__StreamDecoderReadStatusString[];
 
 
+/** Return values for the FLAC__StreamDecoder seek callback.
+ */
+typedef enum {
+
+	FLAC__STREAM_DECODER_SEEK_STATUS_OK,
+	/**< The seek was OK and decoding can continue. */
+
+	FLAC__STREAM_DECODER_SEEK_STATUS_ERROR,
+	/**< An unrecoverable error occurred.  The decoder will return from the process call. */
+
+	FLAC__STREAM_DECODER_SEEK_STATUS_UNSUPPORTED
+	/**< Client does not support seeking. */
+
+} FLAC__StreamDecoderSeekStatus;
+
+/** Maps a FLAC__StreamDecoderSeekStatus to a C string.
+ *
+ *  Using a FLAC__StreamDecoderSeekStatus as the index to this array
+ *  will give the string equivalent.  The contents should not be modified.
+ */
+extern FLAC_API const char * const FLAC__StreamDecoderSeekStatusString[];
+
+
+/** Return values for the FLAC__StreamDecoder tell callback.
+ */
+typedef enum {
+
+	FLAC__STREAM_DECODER_TELL_STATUS_OK,
+	/**< The tell was OK and decoding can continue. */
+
+	FLAC__STREAM_DECODER_TELL_STATUS_ERROR,
+	/**< An unrecoverable error occurred.  The decoder will return from the process call. */
+
+	FLAC__STREAM_DECODER_TELL_STATUS_UNSUPPORTED
+	/**< Client does not support telling the position. */
+
+} FLAC__StreamDecoderTellStatus;
+
+/** Maps a FLAC__StreamDecoderTellStatus to a C string.
+ *
+ *  Using a FLAC__StreamDecoderTellStatus as the index to this array
+ *  will give the string equivalent.  The contents should not be modified.
+ */
+extern FLAC_API const char * const FLAC__StreamDecoderTellStatusString[];
+
+
+/** Return values for the FLAC__StreamDecoder length callback.
+ */
+typedef enum {
+
+	FLAC__STREAM_DECODER_LENGTH_STATUS_OK,
+	/**< The length call was OK and decoding can continue. */
+
+	FLAC__STREAM_DECODER_LENGTH_STATUS_ERROR,
+	/**< An unrecoverable error occurred.  The decoder will return from the process call. */
+
+	FLAC__STREAM_DECODER_LENGTH_STATUS_UNSUPPORTED
+	/**< Client does not support reporting the length. */
+
+} FLAC__StreamDecoderLengthStatus;
+
+/** Maps a FLAC__StreamDecoderLengthStatus to a C string.
+ *
+ *  Using a FLAC__StreamDecoderLengthStatus as the index to this array
+ *  will give the string equivalent.  The contents should not be modified.
+ */
+extern FLAC_API const char * const FLAC__StreamDecoderLengthStatusString[];
+
+
 /** Return values for the FLAC__StreamDecoder write callback.
  */
 typedef enum {
@@ -284,7 +408,20 @@ typedef enum {
 extern FLAC_API const char * const FLAC__StreamDecoderWriteStatusString[];
 
 
-/** Possible values passed in to the FLAC__StreamDecoder error callback.
+/** Possible values passed back to the FLAC__StreamDecoder error callback.
+ *  \c FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC is the generic catch-
+ *  all.  The rest could be caused by bad sync (false synchronization on
+ *  data that is not the start of a frame) or corrupted data.  The error
+ *  itself is the decoder's best guess at what happened assuming a correct
+ *  sync.  For example \c FLAC__STREAM_DECODER_ERROR_STATUS_BAD_HEADER
+ *  could be caused by a correct sync on the start of a frame, but some
+ *  data in the frame header was corrupted.  Or it could be the result of
+ *  syncing on a point the stream that looked like the starting of a frame
+ *  but was not.  \c FLAC__STREAM_DECODER_ERROR_STATUS_UNPARSEABLE_STREAM
+ *  could be because the decoder encountered a valid frame made by a future
+ *  version of the encoder which it cannot parse, or because of a false
+ *  sync making it appear as though an encountered frame was generated by
+ *  a future encoder.
  */
 typedef enum {
 
@@ -294,9 +431,12 @@ typedef enum {
 	FLAC__STREAM_DECODER_ERROR_STATUS_BAD_HEADER,
 	/**< The decoder encountered a corrupted frame header. */
 
-	FLAC__STREAM_DECODER_ERROR_STATUS_FRAME_CRC_MISMATCH
+	FLAC__STREAM_DECODER_ERROR_STATUS_FRAME_CRC_MISMATCH,
 	/**< The frame's data did not match the CRC in the footer. */
 
+	FLAC__STREAM_DECODER_ERROR_STATUS_UNPARSEABLE_STREAM
+	/**< The decoder encountered reserved fields in use in the stream. */
+
 } FLAC__StreamDecoderErrorStatus;
 
 /** Maps a FLAC__StreamDecoderErrorStatus to a C string.
@@ -325,7 +465,37 @@ typedef struct {
 } FLAC__StreamDecoder;
 
 /** Signature for the read callback.
- *  See FLAC__stream_decoder_set_read_callback() for more info.
+ *
+ *  A function pointer matching this signature must be passed to
+ *  FLAC__stream_decoder_init*_stream(). The supplied function will be
+ *  called when the decoder needs more input data.  The address of the
+ *  buffer to be filled is supplied, along with the number of bytes the
+ *  buffer can hold.  The callback may choose to supply less data and
+ *  modify the byte count but must be careful not to overflow the buffer.
+ *  The callback then returns a status code chosen from
+ *  FLAC__StreamDecoderReadStatus.
+ *
+ * Here is an example of a read callback for stdio streams:
+ * \code
+ * FLAC__StreamDecoderReadStatus read_cb(const FLAC__StreamDecoder *decoder, FLAC__byte buffer[], size_t *bytes, void *client_data)
+ * {
+ *   FILE *file = ((MyClientData*)client_data)->file;
+ *   if(*bytes > 0) {
+ *     *bytes = fread(buffer, sizeof(FLAC__byte), *bytes, file);
+ *     if(ferror(file))
+ *       return FLAC__STREAM_DECODER_READ_STATUS_ABORT;
+ *     else if(*bytes == 0)
+ *       return FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM;
+ *     else
+ *       return FLAC__STREAM_DECODER_READ_STATUS_CONTINUE;
+ *   }
+ *   else
+ *     return FLAC__STREAM_DECODER_READ_STATUS_ABORT;
+ * }
+ * \endcode
+ *
+ * \note In general, FLAC__StreamDecoder functions which change the
+ * state should not be called on the \a decoder while in the callback.
  *
  * \param  decoder  The decoder instance calling the callback.
  * \param  buffer   A pointer to a location for the callee to store
@@ -337,14 +507,163 @@ typedef struct {
  *                  stored (0 in case of error or end-of-stream) before
  *                  returning.
  * \param  client_data  The callee's client data set through
- *                      FLAC__stream_decoder_set_client_data().
+ *                      FLAC__stream_decoder_init_*().
  * \retval FLAC__StreamDecoderReadStatus
+ *    The callee's return status.  Note that the callback should return
+ *    \c FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM if and only if
+ *    zero bytes were read and there is no more data to be read.
+ */
+typedef FLAC__StreamDecoderReadStatus (*FLAC__StreamDecoderReadCallback)(const FLAC__StreamDecoder *decoder, FLAC__byte buffer[], size_t *bytes, void *client_data);
+
+/** Signature for the seek callback.
+ *
+ *  A function pointer matching this signature may be passed to
+ *  FLAC__stream_decoder_init*_stream().  The supplied function will be
+ *  called when the decoder needs to seek the input stream.  The decoder
+ *  will pass the absolute byte offset to seek to, 0 meaning the
+ *  beginning of the stream.
+ *
+ * Here is an example of a seek callback for stdio streams:
+ * \code
+ * FLAC__StreamDecoderSeekStatus seek_cb(const FLAC__StreamDecoder *decoder, FLAC__uint64 absolute_byte_offset, void *client_data)
+ * {
+ *   FILE *file = ((MyClientData*)client_data)->file;
+ *   if(file == stdin)
+ *     return FLAC__STREAM_DECODER_SEEK_STATUS_UNSUPPORTED;
+ *   else if(fseeko(file, (off_t)absolute_byte_offset, SEEK_SET) < 0)
+ *     return FLAC__STREAM_DECODER_SEEK_STATUS_ERROR;
+ *   else
+ *     return FLAC__STREAM_DECODER_SEEK_STATUS_OK;
+ * }
+ * \endcode
+ *
+ * \note In general, FLAC__StreamDecoder functions which change the
+ * state should not be called on the \a decoder while in the callback.
+ *
+ * \param  decoder  The decoder instance calling the callback.
+ * \param  absolute_byte_offset  The offset from the beginning of the stream
+ *                               to seek to.
+ * \param  client_data  The callee's client data set through
+ *                      FLAC__stream_decoder_init_*().
+ * \retval FLAC__StreamDecoderSeekStatus
  *    The callee's return status.
  */
-typedef FLAC__StreamDecoderReadStatus (*FLAC__StreamDecoderReadCallback)(const FLAC__StreamDecoder *decoder, FLAC__byte buffer[], unsigned *bytes, void *client_data);
+typedef FLAC__StreamDecoderSeekStatus (*FLAC__StreamDecoderSeekCallback)(const FLAC__StreamDecoder *decoder, FLAC__uint64 absolute_byte_offset, void *client_data);
+
+/** Signature for the tell callback.
+ *
+ *  A function pointer matching this signature may be passed to
+ *  FLAC__stream_decoder_init*_stream().  The supplied function will be
+ *  called when the decoder wants to know the current position of the
+ *  stream.  The callback should return the byte offset from the
+ *  beginning of the stream.
+ *
+ * Here is an example of a tell callback for stdio streams:
+ * \code
+ * FLAC__StreamDecoderTellStatus tell_cb(const FLAC__StreamDecoder *decoder, FLAC__uint64 *absolute_byte_offset, void *client_data)
+ * {
+ *   FILE *file = ((MyClientData*)client_data)->file;
+ *   off_t pos;
+ *   if(file == stdin)
+ *     return FLAC__STREAM_DECODER_TELL_STATUS_UNSUPPORTED;
+ *   else if((pos = ftello(file)) < 0)
+ *     return FLAC__STREAM_DECODER_TELL_STATUS_ERROR;
+ *   else {
+ *     *absolute_byte_offset = (FLAC__uint64)pos;
+ *     return FLAC__STREAM_DECODER_TELL_STATUS_OK;
+ *   }
+ * }
+ * \endcode
+ *
+ * \note In general, FLAC__StreamDecoder functions which change the
+ * state should not be called on the \a decoder while in the callback.
+ *
+ * \param  decoder  The decoder instance calling the callback.
+ * \param  absolute_byte_offset  A pointer to storage for the current offset
+ *                               from the beginning of the stream.
+ * \param  client_data  The callee's client data set through
+ *                      FLAC__stream_decoder_init_*().
+ * \retval FLAC__StreamDecoderTellStatus
+ *    The callee's return status.
+ */
+typedef FLAC__StreamDecoderTellStatus (*FLAC__StreamDecoderTellCallback)(const FLAC__StreamDecoder *decoder, FLAC__uint64 *absolute_byte_offset, void *client_data);
+
+/** Signature for the length callback.
+ *
+ *  A function pointer matching this signature may be passed to
+ *  FLAC__stream_decoder_init*_stream().  The supplied function will be
+ *  called when the decoder wants to know the total length of the stream
+ *  in bytes.
+ *
+ * Here is an example of a length callback for stdio streams:
+ * \code
+ * FLAC__StreamDecoderLengthStatus length_cb(const FLAC__StreamDecoder *decoder, FLAC__uint64 *stream_length, void *client_data)
+ * {
+ *   FILE *file = ((MyClientData*)client_data)->file;
+ *   struct stat filestats;
+ *
+ *   if(file == stdin)
+ *     return FLAC__STREAM_DECODER_LENGTH_STATUS_UNSUPPORTED;
+ *   else if(fstat(fileno(file), &filestats) != 0)
+ *     return FLAC__STREAM_DECODER_LENGTH_STATUS_ERROR;
+ *   else {
+ *     *stream_length = (FLAC__uint64)filestats.st_size;
+ *     return FLAC__STREAM_DECODER_LENGTH_STATUS_OK;
+ *   }
+ * }
+ * \endcode
+ *
+ * \note In general, FLAC__StreamDecoder functions which change the
+ * state should not be called on the \a decoder while in the callback.
+ *
+ * \param  decoder  The decoder instance calling the callback.
+ * \param  stream_length  A pointer to storage for the length of the stream
+ *                        in bytes.
+ * \param  client_data  The callee's client data set through
+ *                      FLAC__stream_decoder_init_*().
+ * \retval FLAC__StreamDecoderLengthStatus
+ *    The callee's return status.
+ */
+typedef FLAC__StreamDecoderLengthStatus (*FLAC__StreamDecoderLengthCallback)(const FLAC__StreamDecoder *decoder, FLAC__uint64 *stream_length, void *client_data);
+
+/** Signature for the EOF callback.
+ *
+ *  A function pointer matching this signature may be passed to
+ *  FLAC__stream_decoder_init*_stream().  The supplied function will be
+ *  called when the decoder needs to know if the end of the stream has
+ *  been reached.
+ *
+ * Here is an example of a EOF callback for stdio streams:
+ * FLAC__bool eof_cb(const FLAC__StreamDecoder *decoder, void *client_data)
+ * \code
+ * {
+ *   FILE *file = ((MyClientData*)client_data)->file;
+ *   return feof(file)? true : false;
+ * }
+ * \endcode
+ *
+ * \note In general, FLAC__StreamDecoder functions which change the
+ * state should not be called on the \a decoder while in the callback.
+ *
+ * \param  decoder  The decoder instance calling the callback.
+ * \param  client_data  The callee's client data set through
+ *                      FLAC__stream_decoder_init_*().
+ * \retval FLAC__bool
+ *    \c true if the currently at the end of the stream, else \c false.
+ */
+typedef FLAC__bool (*FLAC__StreamDecoderEofCallback)(const FLAC__StreamDecoder *decoder, void *client_data);
 
 /** Signature for the write callback.
- *  See FLAC__stream_decoder_set_write_callback() for more info.
+ *
+ *  A function pointer matching this signature must be passed to one of
+ *  the FLAC__stream_decoder_init_*() functions.
+ *  The supplied function will be called when the decoder has decoded a
+ *  single audio frame.  The decoder will pass the frame metadata as well
+ *  as an array of pointers (one for each channel) pointing to the
+ *  decoded audio.
+ *
+ * \note In general, FLAC__StreamDecoder functions which change the
+ * state should not be called on the \a decoder while in the callback.
  *
  * \param  decoder  The decoder instance calling the callback.
  * \param  frame    The description of the decoded frame.  See
@@ -352,33 +671,57 @@ typedef FLAC__StreamDecoderReadStatus (*FLAC__StreamDecoderReadCallback)(const F
  * \param  buffer   An array of pointers to decoded channels of data.
  *                  Each pointer will point to an array of signed
  *                  samples of length \a frame->header.blocksize.
- *                  Currently, the channel order has no meaning
- *                  except for stereo streams; in this case channel
- *                  0 is left and 1 is right.
+ *                  Channels will be ordered according to the FLAC
+ *                  specification; see the documentation for the
+ *                  <A HREF="../format.html#frame_header">frame header</A>.
  * \param  client_data  The callee's client data set through
- *                      FLAC__stream_decoder_set_client_data().
+ *                      FLAC__stream_decoder_init_*().
  * \retval FLAC__StreamDecoderWriteStatus
  *    The callee's return status.
  */
 typedef FLAC__StreamDecoderWriteStatus (*FLAC__StreamDecoderWriteCallback)(const FLAC__StreamDecoder *decoder, const FLAC__Frame *frame, const FLAC__int32 * const buffer[], void *client_data);
 
 /** Signature for the metadata callback.
- *  See FLAC__stream_decoder_set_metadata_callback() for more info.
+ *
+ *  A function pointer matching this signature must be passed to one of
+ *  the FLAC__stream_decoder_init_*() functions.
+ *  The supplied function will be called when the decoder has decoded a
+ *  metadata block.  In a valid FLAC file there will always be one
+ *  \c STREAMINFO block, followed by zero or more other metadata blocks.
+ *  These will be supplied by the decoder in the same order as they
+ *  appear in the stream and always before the first audio frame (i.e.
+ *  write callback).  The metadata block that is passed in must not be
+ *  modified, and it doesn't live beyond the callback, so you should make
+ *  a copy of it with FLAC__metadata_object_clone() if you will need it
+ *  elsewhere.  Since metadata blocks can potentially be large, by
+ *  default the decoder only calls the metadata callback for the
+ *  \c STREAMINFO block; you can instruct the decoder to pass or filter
+ *  other blocks with FLAC__stream_decoder_set_metadata_*() calls.
+ *
+ * \note In general, FLAC__StreamDecoder functions which change the
+ * state should not be called on the \a decoder while in the callback.
  *
  * \param  decoder  The decoder instance calling the callback.
  * \param  metadata The decoded metadata block.
  * \param  client_data  The callee's client data set through
- *                      FLAC__stream_decoder_set_client_data().
+ *                      FLAC__stream_decoder_init_*().
  */
 typedef void (*FLAC__StreamDecoderMetadataCallback)(const FLAC__StreamDecoder *decoder, const FLAC__StreamMetadata *metadata, void *client_data);
 
 /** Signature for the error callback.
- *  See FLAC__stream_decoder_set_error_callback() for more info.
+ *
+ *  A function pointer matching this signature must be passed to one of
+ *  the FLAC__stream_decoder_init_*() functions.
+ *  The supplied function will be called whenever an error occurs during
+ *  decoding.
+ *
+ * \note In general, FLAC__StreamDecoder functions which change the
+ * state should not be called on the \a decoder while in the callback.
  *
  * \param  decoder  The decoder instance calling the callback.
  * \param  status   The error encountered by the decoder.
  * \param  client_data  The callee's client data set through
- *                      FLAC__stream_decoder_set_client_data().
+ *                      FLAC__stream_decoder_init_*().
  */
 typedef void (*FLAC__StreamDecoderErrorCallback)(const FLAC__StreamDecoder *decoder, FLAC__StreamDecoderErrorStatus status, void *client_data);
 
@@ -396,7 +739,7 @@ typedef void (*FLAC__StreamDecoderErrorCallback)(const FLAC__StreamDecoder *deco
  * \retval FLAC__StreamDecoder*
  *    \c NULL if there was an error allocating memory, else the new instance.
  */
-FLAC_API FLAC__StreamDecoder *FLAC__stream_decoder_new();
+FLAC_API FLAC__StreamDecoder *FLAC__stream_decoder_new(void);
 
 /** Free a decoder instance.  Deletes the object pointed to by \a decoder.
  *
@@ -413,107 +756,45 @@ FLAC_API void FLAC__stream_decoder_delete(FLAC__StreamDecoder *decoder);
  *
  ***********************************************************************/
 
-/** Set the read callback.
- *  The supplied function will be called when the decoder needs more input
- *  data.  The address of the buffer to be filled is supplied, along with
- *  the number of bytes the buffer can hold.  The callback may choose to
- *  supply less data and modify the byte count but must be careful not to
- *  overflow the buffer.  The callback then returns a status code chosen
- *  from FLAC__StreamDecoderReadStatus.
+/** Set the serial number for the FLAC stream within the Ogg container.
+ *  The default behavior is to use the serial number of the first Ogg
+ *  page.  Setting a serial number here will explicitly specify which
+ *  stream is to be decoded.
  *
  * \note
- * The callback is mandatory and must be set before initialization.
+ * This does not need to be set for native FLAC decoding.
  *
- * \default \c NULL
- * \param  decoder  A decoder instance to set.
- * \param  value    See above.
+ * \default \c use serial number of first page
+ * \param  decoder        A decoder instance to set.
+ * \param  serial_number  See above.
  * \assert
  *    \code decoder != NULL \endcode
- *    \code value != NULL \endcode
  * \retval FLAC__bool
  *    \c false if the decoder is already initialized, else \c true.
  */
-FLAC_API FLAC__bool FLAC__stream_decoder_set_read_callback(FLAC__StreamDecoder *decoder, FLAC__StreamDecoderReadCallback value);
+FLAC_API FLAC__bool FLAC__stream_decoder_set_ogg_serial_number(FLAC__StreamDecoder *decoder, long serial_number);
 
-/** Set the write callback.
- *  The supplied function will be called when the decoder has decoded a
- *  single frame of data.  The decoder will pass the frame metadata as
- *  well as an array of pointers (one for each channel) pointing to the
- *  decoded audio.
+/** Set the "MD5 signature checking" flag.  If \c true, the decoder will
+ *  compute the MD5 signature of the unencoded audio data while decoding
+ *  and compare it to the signature from the STREAMINFO block, if it
+ *  exists, during FLAC__stream_decoder_finish().
  *
- * \note
- * The callback is mandatory and must be set before initialization.
+ *  MD5 signature checking will be turned off (until the next
+ *  FLAC__stream_decoder_reset()) if there is no signature in the
+ *  STREAMINFO block or when a seek is attempted.
  *
- * \default \c NULL
+ *  Clients that do not use the MD5 check should leave this off to speed
+ *  up decoding.
+ *
+ * \default \c false
  * \param  decoder  A decoder instance to set.
- * \param  value    See above.
- * \assert
- *    \code decoder != NULL \endcode
- *    \code value != NULL \endcode
- * \retval FLAC__bool
- *    \c false if the decoder is already initialized, else \c true.
- */
-FLAC_API FLAC__bool FLAC__stream_decoder_set_write_callback(FLAC__StreamDecoder *decoder, FLAC__StreamDecoderWriteCallback value);
-
-/** Set the metadata callback.
- *  The supplied function will be called when the decoder has decoded a metadata
- *  block.  In a valid FLAC file there will always be one STREAMINFO block,
- *  followed by zero or more other metadata blocks.  These will be supplied
- *  by the decoder in the same order as they appear in the stream and always
- *  before the first audio frame (i.e. write callback).  The metadata block
- *  that is passed in must not be modified, and it doesn't live beyond the
- *  callback, so you should make a copy of it with
- *  FLAC__metadata_object_clone() if you will need it elsewhere.  Since
- *  metadata blocks can potentially be large, by default the decoder only
- *  calls the metadata callback for the STREAMINFO block; you can instruct
- *  the decoder to pass or filter other blocks with
- *  FLAC__stream_decoder_set_metadata_*() calls.
- *
- * \note
- * The callback is mandatory and must be set before initialization.
- *
- * \default \c NULL
- * \param  decoder  A decoder instance to set.
- * \param  value    See above.
- * \assert
- *    \code decoder != NULL \endcode
- *    \code value != NULL \endcode
- * \retval FLAC__bool
- *    \c false if the decoder is already initialized, else \c true.
- */
-FLAC_API FLAC__bool FLAC__stream_decoder_set_metadata_callback(FLAC__StreamDecoder *decoder, FLAC__StreamDecoderMetadataCallback value);
-
-/** Set the error callback.
- *  The supplied function will be called whenever an error occurs during
- *  decoding.
- *
- * \note
- * The callback is mandatory and must be set before initialization.
- *
- * \default \c NULL
- * \param  decoder  A decoder instance to set.
- * \param  value    See above.
- * \assert
- *    \code decoder != NULL \endcode
- *    \code value != NULL \endcode
- * \retval FLAC__bool
- *    \c false if the decoder is already initialized, else \c true.
- */
-FLAC_API FLAC__bool FLAC__stream_decoder_set_error_callback(FLAC__StreamDecoder *decoder, FLAC__StreamDecoderErrorCallback value);
-
-/** Set the client data to be passed back to callbacks.
- *  This value will be supplied to callbacks in their \a client_data
- *  argument.
- *
- * \default \c NULL
- * \param  decoder  A decoder instance to set.
- * \param  value    See above.
+ * \param  value    Flag value (see above).
  * \assert
  *    \code decoder != NULL \endcode
  * \retval FLAC__bool
  *    \c false if the decoder is already initialized, else \c true.
  */
-FLAC_API FLAC__bool FLAC__stream_decoder_set_client_data(FLAC__StreamDecoder *decoder, void *value);
+FLAC_API FLAC__bool FLAC__stream_decoder_set_md5_checking(FLAC__StreamDecoder *decoder, FLAC__bool value);
 
 /** Direct the decoder to pass on all metadata blocks of type \a type.
  *
@@ -617,6 +898,32 @@ FLAC_API FLAC__StreamDecoderState FLAC__stream_decoder_get_state(const FLAC__Str
  */
 FLAC_API const char *FLAC__stream_decoder_get_resolved_state_string(const FLAC__StreamDecoder *decoder);
 
+/** Get the "MD5 signature checking" flag.
+ *  This is the value of the setting, not whether or not the decoder is
+ *  currently checking the MD5 (remember, it can be turned off automatically
+ *  by a seek).  When the decoder is reset the flag will be restored to the
+ *  value returned by this function.
+ *
+ * \param  decoder  A decoder instance to query.
+ * \assert
+ *    \code decoder != NULL \endcode
+ * \retval FLAC__bool
+ *    See above.
+ */
+FLAC_API FLAC__bool FLAC__stream_decoder_get_md5_checking(const FLAC__StreamDecoder *decoder);
+
+/** Get the total number of samples in the stream being decoded.
+ *  Will only be valid after decoding has started and will contain the
+ *  value from the \c STREAMINFO block.  A value of \c 0 means "unknown".
+ *
+ * \param  decoder  A decoder instance to query.
+ * \assert
+ *    \code decoder != NULL \endcode
+ * \retval unsigned
+ *    See above.
+ */
+FLAC_API FLAC__uint64 FLAC__stream_decoder_get_total_samples(const FLAC__StreamDecoder *decoder);
+
 /** Get the current number of channels in the stream being decoded.
  *  Will only be valid after decoding has started and will contain the
  *  value from the most recently decoded frame header.
@@ -677,22 +984,368 @@ FLAC_API unsigned FLAC__stream_decoder_get_sample_rate(const FLAC__StreamDecoder
  */
 FLAC_API unsigned FLAC__stream_decoder_get_blocksize(const FLAC__StreamDecoder *decoder);
 
-/** Initialize the decoder instance.
- *  Should be called after FLAC__stream_decoder_new() and
+/** Returns the decoder's current read position within the stream.
+ *  The position is the byte offset from the start of the stream.
+ *  Bytes before this position have been fully decoded.  Note that
+ *  there may still be undecoded bytes in the decoder's read FIFO.
+ *  The returned position is correct even after a seek.
+ *
+ *  \warning This function currently only works for native FLAC,
+ *           not Ogg FLAC streams.
+ *
+ * \param  decoder   A decoder instance to query.
+ * \param  position  Address at which to return the desired position.
+ * \assert
+ *    \code decoder != NULL \endcode
+ *    \code position != NULL \endcode
+ * \retval FLAC__bool
+ *    \c true if successful, \c false if the stream is not native FLAC,
+ *    or there was an error from the 'tell' callback or it returned
+ *    \c FLAC__STREAM_DECODER_TELL_STATUS_UNSUPPORTED.
+ */
+FLAC_API FLAC__bool FLAC__stream_decoder_get_decode_position(const FLAC__StreamDecoder *decoder, FLAC__uint64 *position);
+
+/** Initialize the decoder instance to decode native FLAC streams.
+ *
+ *  This flavor of initialization sets up the decoder to decode from a
+ *  native FLAC stream. I/O is performed via callbacks to the client.
+ *  For decoding from a plain file via filename or open FILE*,
+ *  FLAC__stream_decoder_init_file() and FLAC__stream_decoder_init_FILE()
+ *  provide a simpler interface.
+ *
+ *  This function should be called after FLAC__stream_decoder_new() and
  *  FLAC__stream_decoder_set_*() but before any of the
  *  FLAC__stream_decoder_process_*() functions.  Will set and return the
  *  decoder state, which will be FLAC__STREAM_DECODER_SEARCH_FOR_METADATA
  *  if initialization succeeded.
  *
- * \param  decoder  An uninitialized decoder instance.
+ * \param  decoder            An uninitialized decoder instance.
+ * \param  read_callback      See FLAC__StreamDecoderReadCallback.  This
+ *                            pointer must not be \c NULL.
+ * \param  seek_callback      See FLAC__StreamDecoderSeekCallback.  This
+ *                            pointer may be \c NULL if seeking is not
+ *                            supported.  If \a seek_callback is not \c NULL then a
+ *                            \a tell_callback, \a length_callback, and \a eof_callback must also be supplied.
+ *                            Alternatively, a dummy seek callback that just
+ *                            returns \c FLAC__STREAM_DECODER_SEEK_STATUS_UNSUPPORTED
+ *                            may also be supplied, all though this is slightly
+ *                            less efficient for the decoder.
+ * \param  tell_callback      See FLAC__StreamDecoderTellCallback.  This
+ *                            pointer may be \c NULL if not supported by the client.  If
+ *                            \a seek_callback is not \c NULL then a
+ *                            \a tell_callback must also be supplied.
+ *                            Alternatively, a dummy tell callback that just
+ *                            returns \c FLAC__STREAM_DECODER_TELL_STATUS_UNSUPPORTED
+ *                            may also be supplied, all though this is slightly
+ *                            less efficient for the decoder.
+ * \param  length_callback    See FLAC__StreamDecoderLengthCallback.  This
+ *                            pointer may be \c NULL if not supported by the client.  If
+ *                            \a seek_callback is not \c NULL then a
+ *                            \a length_callback must also be supplied.
+ *                            Alternatively, a dummy length callback that just
+ *                            returns \c FLAC__STREAM_DECODER_LENGTH_STATUS_UNSUPPORTED
+ *                            may also be supplied, all though this is slightly
+ *                            less efficient for the decoder.
+ * \param  eof_callback       See FLAC__StreamDecoderEofCallback.  This
+ *                            pointer may be \c NULL if not supported by the client.  If
+ *                            \a seek_callback is not \c NULL then a
+ *                            \a eof_callback must also be supplied.
+ *                            Alternatively, a dummy length callback that just
+ *                            returns \c false
+ *                            may also be supplied, all though this is slightly
+ *                            less efficient for the decoder.
+ * \param  write_callback     See FLAC__StreamDecoderWriteCallback.  This
+ *                            pointer must not be \c NULL.
+ * \param  metadata_callback  See FLAC__StreamDecoderMetadataCallback.  This
+ *                            pointer may be \c NULL if the callback is not
+ *                            desired.
+ * \param  error_callback     See FLAC__StreamDecoderErrorCallback.  This
+ *                            pointer must not be \c NULL.
+ * \param  client_data        This value will be supplied to callbacks in their
+ *                            \a client_data argument.
  * \assert
  *    \code decoder != NULL \endcode
- * \retval FLAC__StreamDecoderState
- *    \c FLAC__STREAM_DECODER_SEARCH_FOR_METADATA if initialization was
- *    successful; see FLAC__StreamDecoderState for the meanings of other
- *    return values.
+ * \retval FLAC__StreamDecoderInitStatus
+ *    \c FLAC__STREAM_DECODER_INIT_STATUS_OK if initialization was successful;
+ *    see FLAC__StreamDecoderInitStatus for the meanings of other return values.
  */
-FLAC_API FLAC__StreamDecoderState FLAC__stream_decoder_init(FLAC__StreamDecoder *decoder);
+FLAC_API FLAC__StreamDecoderInitStatus FLAC__stream_decoder_init_stream(
+	FLAC__StreamDecoder *decoder,
+	FLAC__StreamDecoderReadCallback read_callback,
+	FLAC__StreamDecoderSeekCallback seek_callback,
+	FLAC__StreamDecoderTellCallback tell_callback,
+	FLAC__StreamDecoderLengthCallback length_callback,
+	FLAC__StreamDecoderEofCallback eof_callback,
+	FLAC__StreamDecoderWriteCallback write_callback,
+	FLAC__StreamDecoderMetadataCallback metadata_callback,
+	FLAC__StreamDecoderErrorCallback error_callback,
+	void *client_data
+);
+
+/** Initialize the decoder instance to decode Ogg FLAC streams.
+ *
+ *  This flavor of initialization sets up the decoder to decode from a
+ *  FLAC stream in an Ogg container. I/O is performed via callbacks to the
+ *  client.  For decoding from a plain file via filename or open FILE*,
+ *  FLAC__stream_decoder_init_ogg_file() and FLAC__stream_decoder_init_ogg_FILE()
+ *  provide a simpler interface.
+ *
+ *  This function should be called after FLAC__stream_decoder_new() and
+ *  FLAC__stream_decoder_set_*() but before any of the
+ *  FLAC__stream_decoder_process_*() functions.  Will set and return the
+ *  decoder state, which will be FLAC__STREAM_DECODER_SEARCH_FOR_METADATA
+ *  if initialization succeeded.
+ *
+ *  \note Support for Ogg FLAC in the library is optional.  If this
+ *  library has been built without support for Ogg FLAC, this function
+ *  will return \c FLAC__STREAM_DECODER_INIT_STATUS_UNSUPPORTED_CONTAINER.
+ *
+ * \param  decoder            An uninitialized decoder instance.
+ * \param  read_callback      See FLAC__StreamDecoderReadCallback.  This
+ *                            pointer must not be \c NULL.
+ * \param  seek_callback      See FLAC__StreamDecoderSeekCallback.  This
+ *                            pointer may be \c NULL if seeking is not
+ *                            supported.  If \a seek_callback is not \c NULL then a
+ *                            \a tell_callback, \a length_callback, and \a eof_callback must also be supplied.
+ *                            Alternatively, a dummy seek callback that just
+ *                            returns \c FLAC__STREAM_DECODER_SEEK_STATUS_UNSUPPORTED
+ *                            may also be supplied, all though this is slightly
+ *                            less efficient for the decoder.
+ * \param  tell_callback      See FLAC__StreamDecoderTellCallback.  This
+ *                            pointer may be \c NULL if not supported by the client.  If
+ *                            \a seek_callback is not \c NULL then a
+ *                            \a tell_callback must also be supplied.
+ *                            Alternatively, a dummy tell callback that just
+ *                            returns \c FLAC__STREAM_DECODER_TELL_STATUS_UNSUPPORTED
+ *                            may also be supplied, all though this is slightly
+ *                            less efficient for the decoder.
+ * \param  length_callback    See FLAC__StreamDecoderLengthCallback.  This
+ *                            pointer may be \c NULL if not supported by the client.  If
+ *                            \a seek_callback is not \c NULL then a
+ *                            \a length_callback must also be supplied.
+ *                            Alternatively, a dummy length callback that just
+ *                            returns \c FLAC__STREAM_DECODER_LENGTH_STATUS_UNSUPPORTED
+ *                            may also be supplied, all though this is slightly
+ *                            less efficient for the decoder.
+ * \param  eof_callback       See FLAC__StreamDecoderEofCallback.  This
+ *                            pointer may be \c NULL if not supported by the client.  If
+ *                            \a seek_callback is not \c NULL then a
+ *                            \a eof_callback must also be supplied.
+ *                            Alternatively, a dummy length callback that just
+ *                            returns \c false
+ *                            may also be supplied, all though this is slightly
+ *                            less efficient for the decoder.
+ * \param  write_callback     See FLAC__StreamDecoderWriteCallback.  This
+ *                            pointer must not be \c NULL.
+ * \param  metadata_callback  See FLAC__StreamDecoderMetadataCallback.  This
+ *                            pointer may be \c NULL if the callback is not
+ *                            desired.
+ * \param  error_callback     See FLAC__StreamDecoderErrorCallback.  This
+ *                            pointer must not be \c NULL.
+ * \param  client_data        This value will be supplied to callbacks in their
+ *                            \a client_data argument.
+ * \assert
+ *    \code decoder != NULL \endcode
+ * \retval FLAC__StreamDecoderInitStatus
+ *    \c FLAC__STREAM_DECODER_INIT_STATUS_OK if initialization was successful;
+ *    see FLAC__StreamDecoderInitStatus for the meanings of other return values.
+ */
+FLAC_API FLAC__StreamDecoderInitStatus FLAC__stream_decoder_init_ogg_stream(
+	FLAC__StreamDecoder *decoder,
+	FLAC__StreamDecoderReadCallback read_callback,
+	FLAC__StreamDecoderSeekCallback seek_callback,
+	FLAC__StreamDecoderTellCallback tell_callback,
+	FLAC__StreamDecoderLengthCallback length_callback,
+	FLAC__StreamDecoderEofCallback eof_callback,
+	FLAC__StreamDecoderWriteCallback write_callback,
+	FLAC__StreamDecoderMetadataCallback metadata_callback,
+	FLAC__StreamDecoderErrorCallback error_callback,
+	void *client_data
+);
+
+/** Initialize the decoder instance to decode native FLAC files.
+ *
+ *  This flavor of initialization sets up the decoder to decode from a
+ *  plain native FLAC file.  For non-stdio streams, you must use
+ *  FLAC__stream_decoder_init_stream() and provide callbacks for the I/O.
+ *
+ *  This function should be called after FLAC__stream_decoder_new() and
+ *  FLAC__stream_decoder_set_*() but before any of the
+ *  FLAC__stream_decoder_process_*() functions.  Will set and return the
+ *  decoder state, which will be FLAC__STREAM_DECODER_SEARCH_FOR_METADATA
+ *  if initialization succeeded.
+ *
+ * \param  decoder            An uninitialized decoder instance.
+ * \param  file               An open FLAC file.  The file should have been
+ *                            opened with mode \c "rb" and rewound.  The file
+ *                            becomes owned by the decoder and should not be
+ *                            manipulated by the client while decoding.
+ *                            Unless \a file is \c stdin, it will be closed
+ *                            when FLAC__stream_decoder_finish() is called.
+ *                            Note however that seeking will not work when
+ *                            decoding from \c stdout since it is not seekable.
+ * \param  write_callback     See FLAC__StreamDecoderWriteCallback.  This
+ *                            pointer must not be \c NULL.
+ * \param  metadata_callback  See FLAC__StreamDecoderMetadataCallback.  This
+ *                            pointer may be \c NULL if the callback is not
+ *                            desired.
+ * \param  error_callback     See FLAC__StreamDecoderErrorCallback.  This
+ *                            pointer must not be \c NULL.
+ * \param  client_data        This value will be supplied to callbacks in their
+ *                            \a client_data argument.
+ * \assert
+ *    \code decoder != NULL \endcode
+ *    \code file != NULL \endcode
+ * \retval FLAC__StreamDecoderInitStatus
+ *    \c FLAC__STREAM_DECODER_INIT_STATUS_OK if initialization was successful;
+ *    see FLAC__StreamDecoderInitStatus for the meanings of other return values.
+ */
+FLAC_API FLAC__StreamDecoderInitStatus FLAC__stream_decoder_init_FILE(
+	FLAC__StreamDecoder *decoder,
+	FILE *file,
+	FLAC__StreamDecoderWriteCallback write_callback,
+	FLAC__StreamDecoderMetadataCallback metadata_callback,
+	FLAC__StreamDecoderErrorCallback error_callback,
+	void *client_data
+);
+
+/** Initialize the decoder instance to decode Ogg FLAC files.
+ *
+ *  This flavor of initialization sets up the decoder to decode from a
+ *  plain Ogg FLAC file.  For non-stdio streams, you must use
+ *  FLAC__stream_decoder_init_ogg_stream() and provide callbacks for the I/O.
+ *
+ *  This function should be called after FLAC__stream_decoder_new() and
+ *  FLAC__stream_decoder_set_*() but before any of the
+ *  FLAC__stream_decoder_process_*() functions.  Will set and return the
+ *  decoder state, which will be FLAC__STREAM_DECODER_SEARCH_FOR_METADATA
+ *  if initialization succeeded.
+ *
+ *  \note Support for Ogg FLAC in the library is optional.  If this
+ *  library has been built without support for Ogg FLAC, this function
+ *  will return \c FLAC__STREAM_DECODER_INIT_STATUS_UNSUPPORTED_CONTAINER.
+ *
+ * \param  decoder            An uninitialized decoder instance.
+ * \param  file               An open FLAC file.  The file should have been
+ *                            opened with mode \c "rb" and rewound.  The file
+ *                            becomes owned by the decoder and should not be
+ *                            manipulated by the client while decoding.
+ *                            Unless \a file is \c stdin, it will be closed
+ *                            when FLAC__stream_decoder_finish() is called.
+ *                            Note however that seeking will not work when
+ *                            decoding from \c stdout since it is not seekable.
+ * \param  write_callback     See FLAC__StreamDecoderWriteCallback.  This
+ *                            pointer must not be \c NULL.
+ * \param  metadata_callback  See FLAC__StreamDecoderMetadataCallback.  This
+ *                            pointer may be \c NULL if the callback is not
+ *                            desired.
+ * \param  error_callback     See FLAC__StreamDecoderErrorCallback.  This
+ *                            pointer must not be \c NULL.
+ * \param  client_data        This value will be supplied to callbacks in their
+ *                            \a client_data argument.
+ * \assert
+ *    \code decoder != NULL \endcode
+ *    \code file != NULL \endcode
+ * \retval FLAC__StreamDecoderInitStatus
+ *    \c FLAC__STREAM_DECODER_INIT_STATUS_OK if initialization was successful;
+ *    see FLAC__StreamDecoderInitStatus for the meanings of other return values.
+ */
+FLAC_API FLAC__StreamDecoderInitStatus FLAC__stream_decoder_init_ogg_FILE(
+	FLAC__StreamDecoder *decoder,
+	FILE *file,
+	FLAC__StreamDecoderWriteCallback write_callback,
+	FLAC__StreamDecoderMetadataCallback metadata_callback,
+	FLAC__StreamDecoderErrorCallback error_callback,
+	void *client_data
+);
+
+/** Initialize the decoder instance to decode native FLAC files.
+ *
+ *  This flavor of initialization sets up the decoder to decode from a plain
+ *  native FLAC file.  If POSIX fopen() semantics are not sufficient, (for
+ *  example, with Unicode filenames on Windows), you must use
+ *  FLAC__stream_decoder_init_FILE(), or FLAC__stream_decoder_init_stream()
+ *  and provide callbacks for the I/O.
+ *
+ *  This function should be called after FLAC__stream_decoder_new() and
+ *  FLAC__stream_decoder_set_*() but before any of the
+ *  FLAC__stream_decoder_process_*() functions.  Will set and return the
+ *  decoder state, which will be FLAC__STREAM_DECODER_SEARCH_FOR_METADATA
+ *  if initialization succeeded.
+ *
+ * \param  decoder            An uninitialized decoder instance.
+ * \param  filename           The name of the file to decode from.  The file will
+ *                            be opened with fopen().  Use \c NULL to decode from
+ *                            \c stdin.  Note that \c stdin is not seekable.
+ * \param  write_callback     See FLAC__StreamDecoderWriteCallback.  This
+ *                            pointer must not be \c NULL.
+ * \param  metadata_callback  See FLAC__StreamDecoderMetadataCallback.  This
+ *                            pointer may be \c NULL if the callback is not
+ *                            desired.
+ * \param  error_callback     See FLAC__StreamDecoderErrorCallback.  This
+ *                            pointer must not be \c NULL.
+ * \param  client_data        This value will be supplied to callbacks in their
+ *                            \a client_data argument.
+ * \assert
+ *    \code decoder != NULL \endcode
+ * \retval FLAC__StreamDecoderInitStatus
+ *    \c FLAC__STREAM_DECODER_INIT_STATUS_OK if initialization was successful;
+ *    see FLAC__StreamDecoderInitStatus for the meanings of other return values.
+ */
+FLAC_API FLAC__StreamDecoderInitStatus FLAC__stream_decoder_init_file(
+	FLAC__StreamDecoder *decoder,
+	const char *filename,
+	FLAC__StreamDecoderWriteCallback write_callback,
+	FLAC__StreamDecoderMetadataCallback metadata_callback,
+	FLAC__StreamDecoderErrorCallback error_callback,
+	void *client_data
+);
+
+/** Initialize the decoder instance to decode Ogg FLAC files.
+ *
+ *  This flavor of initialization sets up the decoder to decode from a plain
+ *  Ogg FLAC file.  If POSIX fopen() semantics are not sufficient, (for
+ *  example, with Unicode filenames on Windows), you must use
+ *  FLAC__stream_decoder_init_ogg_FILE(), or FLAC__stream_decoder_init_ogg_stream()
+ *  and provide callbacks for the I/O.
+ *
+ *  This function should be called after FLAC__stream_decoder_new() and
+ *  FLAC__stream_decoder_set_*() but before any of the
+ *  FLAC__stream_decoder_process_*() functions.  Will set and return the
+ *  decoder state, which will be FLAC__STREAM_DECODER_SEARCH_FOR_METADATA
+ *  if initialization succeeded.
+ *
+ *  \note Support for Ogg FLAC in the library is optional.  If this
+ *  library has been built without support for Ogg FLAC, this function
+ *  will return \c FLAC__STREAM_DECODER_INIT_STATUS_UNSUPPORTED_CONTAINER.
+ *
+ * \param  decoder            An uninitialized decoder instance.
+ * \param  filename           The name of the file to decode from.  The file will
+ *                            be opened with fopen().  Use \c NULL to decode from
+ *                            \c stdin.  Note that \c stdin is not seekable.
+ * \param  write_callback     See FLAC__StreamDecoderWriteCallback.  This
+ *                            pointer must not be \c NULL.
+ * \param  metadata_callback  See FLAC__StreamDecoderMetadataCallback.  This
+ *                            pointer may be \c NULL if the callback is not
+ *                            desired.
+ * \param  error_callback     See FLAC__StreamDecoderErrorCallback.  This
+ *                            pointer must not be \c NULL.
+ * \param  client_data        This value will be supplied to callbacks in their
+ *                            \a client_data argument.
+ * \assert
+ *    \code decoder != NULL \endcode
+ * \retval FLAC__StreamDecoderInitStatus
+ *    \c FLAC__STREAM_DECODER_INIT_STATUS_OK if initialization was successful;
+ *    see FLAC__StreamDecoderInitStatus for the meanings of other return values.
+ */
+FLAC_API FLAC__StreamDecoderInitStatus FLAC__stream_decoder_init_ogg_file(
+	FLAC__StreamDecoder *decoder,
+	const char *filename,
+	FLAC__StreamDecoderWriteCallback write_callback,
+	FLAC__StreamDecoderMetadataCallback metadata_callback,
+	FLAC__StreamDecoderErrorCallback error_callback,
+	void *client_data
+);
 
 /** Finish the decoding process.
  *  Flushes the decoding buffer, releases resources, resets the decoder
@@ -701,25 +1354,32 @@ FLAC_API FLAC__StreamDecoderState FLAC__stream_decoder_init(FLAC__StreamDecoder
  *
  *  In the event of a prematurely-terminated decode, it is not strictly
  *  necessary to call this immediately before FLAC__stream_decoder_delete()
- *  but it is good practice to match every FLAC__stream_decoder_init()
+ *  but it is good practice to match every FLAC__stream_decoder_init_*()
  *  with a FLAC__stream_decoder_finish().
  *
  * \param  decoder  An uninitialized decoder instance.
  * \assert
  *    \code decoder != NULL \endcode
+ * \retval FLAC__bool
+ *    \c false if MD5 checking is on AND a STREAMINFO block was available
+ *    AND the MD5 signature in the STREAMINFO block was non-zero AND the
+ *    signature does not match the one computed by the decoder; else
+ *    \c true.
  */
-FLAC_API void FLAC__stream_decoder_finish(FLAC__StreamDecoder *decoder);
+FLAC_API FLAC__bool FLAC__stream_decoder_finish(FLAC__StreamDecoder *decoder);
 
 /** Flush the stream input.
  *  The decoder's input buffer will be cleared and the state set to
- *  \c FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC.
+ *  \c FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC.  This will also turn
+ *  off MD5 checking.
  *
  * \param  decoder  A decoder instance.
  * \assert
  *    \code decoder != NULL \endcode
  * \retval FLAC__bool
  *    \c true if successful, else \c false if a memory allocation
- *    error occurs.
+ *    error occurs (in which case the state will be set to
+ *    \c FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR).
  */
 FLAC_API FLAC__bool FLAC__stream_decoder_flush(FLAC__StreamDecoder *decoder);
 
@@ -727,15 +1387,32 @@ FLAC_API FLAC__bool FLAC__stream_decoder_flush(FLAC__StreamDecoder *decoder);
  *  The decoder's input buffer will be cleared and the state set to
  *  \c FLAC__STREAM_DECODER_SEARCH_FOR_METADATA.  This is similar to
  *  FLAC__stream_decoder_finish() except that the settings are
- *  preserved; there is no need to call FLAC__stream_decoder_init()
- *  before decoding again.
+ *  preserved; there is no need to call FLAC__stream_decoder_init_*()
+ *  before decoding again.  MD5 checking will be restored to its original
+ *  setting.
+ *
+ *  If the decoder is seekable, or was initialized with
+ *  FLAC__stream_decoder_init*_FILE() or FLAC__stream_decoder_init*_file(),
+ *  the decoder will also attempt to seek to the beginning of the file.
+ *  If this rewind fails, this function will return \c false.  It follows
+ *  that FLAC__stream_decoder_reset() cannot be used when decoding from
+ *  \c stdin.
+ *
+ *  If the decoder was initialized with FLAC__stream_encoder_init*_stream()
+ *  and is not seekable (i.e. no seek callback was provided or the seek
+ *  callback returns \c FLAC__STREAM_DECODER_SEEK_STATUS_UNSUPPORTED), it
+ *  is the duty of the client to start feeding data from the beginning of
+ *  the stream on the next FLAC__stream_decoder_process() or
+ *  FLAC__stream_decoder_process_interleaved() call.
  *
  * \param  decoder  A decoder instance.
  * \assert
  *    \code decoder != NULL \endcode
  * \retval FLAC__bool
- *    \c true if successful, else \c false if a memory allocation
- *    error occurs.
+ *    \c true if successful, else \c false if a memory allocation occurs
+ *    (in which case the state will be set to
+ *    \c FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR) or a seek error
+ *    occurs (the state will be unchanged).
  */
 FLAC_API FLAC__bool FLAC__stream_decoder_reset(FLAC__StreamDecoder *decoder);
 
@@ -747,16 +1424,14 @@ FLAC_API FLAC__bool FLAC__stream_decoder_reset(FLAC__StreamDecoder *decoder);
  *
  *  As the decoder needs more input it will call the read callback.
  *  Depending on what was decoded, the metadata or write callback will be
- *  called with the decoded metadata block or audio frame, unless an error
- *  occurred.  If the decoder loses sync it will call the error callback
- *  instead.
+ *  called with the decoded metadata block or audio frame.
  *
  *  Unless there is a fatal read error or end of stream, this function
  *  will return once one whole frame is decoded.  In other words, if the
  *  stream is not synchronized or points to a corrupt frame header, the
  *  decoder will continue to try and resync until it gets to a valid
  *  frame, then decode one frame, then return.  If the decoder points to
- *  frame whose frame CRC in the frame footer does not match the
+ *  a frame whose frame CRC in the frame footer does not match the
  *  computed frame CRC, this function will issue a
  *  FLAC__STREAM_DECODER_ERROR_STATUS_FRAME_CRC_MISMATCH error to the
  *  error callback, and return, having decoded one complete, although
@@ -767,11 +1442,10 @@ FLAC_API FLAC__bool FLAC__stream_decoder_reset(FLAC__StreamDecoder *decoder);
  * \assert
  *    \code decoder != NULL \endcode
  * \retval FLAC__bool
- *    \c false if any read or write error occurred (except
- *    \c FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC), else \c true;
- *    in any case, check the decoder state with
- *    FLAC__stream_decoder_get_state() to see what went wrong or to
- *    check for lost synchronization (a sign of stream corruption).
+ *    \c false if any fatal read, write, or memory allocation error
+ *    occurred (meaning decoding must stop), else \c true; for more
+ *    information about the decoder, check the decoder state with
+ *    FLAC__stream_decoder_get_state().
  */
 FLAC_API FLAC__bool FLAC__stream_decoder_process_single(FLAC__StreamDecoder *decoder);
 
@@ -783,18 +1457,16 @@ FLAC_API FLAC__bool FLAC__stream_decoder_process_single(FLAC__StreamDecoder *dec
  *
  *  As the decoder needs more input it will call the read callback.
  *  As each metadata block is decoded, the metadata callback will be called
- *  with the decoded metadata.  If the decoder loses sync it will call the
- *  error callback.
+ *  with the decoded metadata.
  *
  * \param  decoder  An initialized decoder instance.
  * \assert
  *    \code decoder != NULL \endcode
  * \retval FLAC__bool
- *    \c false if any read or write error occurred (except
- *    \c FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC), else \c true;
- *    in any case, check the decoder state with
- *    FLAC__stream_decoder_get_state() to see what went wrong or to
- *    check for lost synchronization (a sign of stream corruption).
+ *    \c false if any fatal read, write, or memory allocation error
+ *    occurred (meaning decoding must stop), else \c true; for more
+ *    information about the decoder, check the decoder state with
+ *    FLAC__stream_decoder_get_state().
  */
 FLAC_API FLAC__bool FLAC__stream_decoder_process_until_end_of_metadata(FLAC__StreamDecoder *decoder);
 
@@ -806,18 +1478,16 @@ FLAC_API FLAC__bool FLAC__stream_decoder_process_until_end_of_metadata(FLAC__Str
  *
  *  As the decoder needs more input it will call the read callback.
  *  As each metadata block and frame is decoded, the metadata or write
- *  callback will be called with the decoded metadata or frame.  If the
- *  decoder loses sync it will call the error callback.
+ *  callback will be called with the decoded metadata or frame.
  *
  * \param  decoder  An initialized decoder instance.
  * \assert
  *    \code decoder != NULL \endcode
  * \retval FLAC__bool
- *    \c false if any read or write error occurred (except
- *    \c FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC), else \c true;
- *    in any case, check the decoder state with
- *    FLAC__stream_decoder_get_state() to see what went wrong or to
- *    check for lost synchronization (a sign of stream corruption).
+ *    \c false if any fatal read, write, or memory allocation error
+ *    occurred (meaning decoding must stop), else \c true; for more
+ *    information about the decoder, check the decoder state with
+ *    FLAC__stream_decoder_get_state().
  */
 FLAC_API FLAC__bool FLAC__stream_decoder_process_until_end_of_stream(FLAC__StreamDecoder *decoder);
 
@@ -836,15 +1506,14 @@ FLAC_API FLAC__bool FLAC__stream_decoder_process_until_end_of_stream(FLAC__Strea
  *  same way that FLAC__stream_decoder_process_single() will return once
  *  one whole frame is decoded.
  *
- *  This function, when used from the higher FLAC__SeekableStreamDecoder
- *  layer, can be used in more quickly determining FLAC frame boundaries
- *  when decoding of the actual data is not needed, for example when an
- *  application is separating a FLAC stream into frames for editing or
- *  storing in a container.  To do this, the application can use
- *  FLAC__seekable_stream_decoder_skip_single_frame() to quickly advance
+ *  This function can be used in more quickly determining FLAC frame
+ *  boundaries when decoding of the actual data is not needed, for
+ *  example when an application is separating a FLAC stream into frames
+ *  for editing or storing in a container.  To do this, the application
+ *  can use FLAC__stream_decoder_skip_single_frame() to quickly advance
  *  to the next frame, then use
- *  FLAC__seekable_stream_decoder_get_decode_position() to find the new
- *  frame boundary.
+ *  FLAC__stream_decoder_get_decode_position() to find the new frame
+ *  boundary.
  *
  *  This function should only be called when the stream has advanced
  *  past all the metadata, otherwise it will return \c false.
@@ -854,16 +1523,33 @@ FLAC_API FLAC__bool FLAC__stream_decoder_process_until_end_of_stream(FLAC__Strea
  * \assert
  *    \code decoder != NULL \endcode
  * \retval FLAC__bool
- *    \c false if any read or write error occurred (except
- *    \c FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC), or if the decoder
+ *    \c false if any fatal read, write, or memory allocation error
+ *    occurred (meaning decoding must stop), or if the decoder
  *    is in the FLAC__STREAM_DECODER_SEARCH_FOR_METADATA or
- *    FLAC__STREAM_DECODER_READ_METADATA state, else \c true;
- *    in any case, check the decoder state with
- *    FLAC__stream_decoder_get_state() to see what went wrong or to
- *    check for lost synchronization (a sign of stream corruption).
+ *    FLAC__STREAM_DECODER_READ_METADATA state, else \c true; for more
+ *    information about the decoder, check the decoder state with
+ *    FLAC__stream_decoder_get_state().
  */
 FLAC_API FLAC__bool FLAC__stream_decoder_skip_single_frame(FLAC__StreamDecoder *decoder);
 
+/** Flush the input and seek to an absolute sample.
+ *  Decoding will resume at the given sample.  Note that because of
+ *  this, the next write callback may contain a partial block.  The
+ *  client must support seeking the input or this function will fail
+ *  and return \c false.  Furthermore, if the decoder state is
+ *  \c FLAC__STREAM_DECODER_SEEK_ERROR, then the decoder must be flushed
+ *  with FLAC__stream_decoder_flush() or reset with
+ *  FLAC__stream_decoder_reset() before decoding can continue.
+ *
+ * \param  decoder  A decoder instance.
+ * \param  sample   The target sample number to seek to.
+ * \assert
+ *    \code decoder != NULL \endcode
+ * \retval FLAC__bool
+ *    \c true if successful, else \c false.
+ */
+FLAC_API FLAC__bool FLAC__stream_decoder_seek_absolute(FLAC__StreamDecoder *decoder, FLAC__uint64 sample);
+
 /* \} */
 
 #ifdef __cplusplus
diff --git a/FLAC/Makefile.mgw b/FLAC/Makefile.mgw
index fdf9aa0321..f910c5804f 100644
--- a/FLAC/Makefile.mgw
+++ b/FLAC/Makefile.mgw
@@ -7,14 +7,14 @@
 # Copyright (C) 1995-2003 Jean-loup Gailly.
 # For conditions of distribution and use, see copyright notice in zlib.h
 
-ifeq (Windows_NT,$(OS))
-  WIN=1
-  WINCMD=1
-endif
-ifeq (msys,$(OSTYPE))
-  WIN=1
-  WINCMD=0
-endif
+ifeq (Windows_NT,$(OS))
+  WIN=1
+  WINCMD=1
+endif
+ifeq (msys,$(OSTYPE))
+  WIN=1
+  WINCMD=0
+endif
 
 STATICLIB = libflac.a
 
@@ -26,7 +26,7 @@ DEFINES = -D__MINW32__ -DWIN32 -DNDEBUG -D_LIB -DFLAC__CPU_IA32 -DFLAC_HAS_NASM
 CCDV = @../ccdv
 CC = gcc
 CXX = g++
-CFLAGS = $(LOC) $(DEFINES) -O2 -Wall -fomit-frame-pointer
+CFLAGS = $(LOC) $(DEFINES) -O2 -Wall -Wno-unused-function -fomit-frame-pointer
 CXXFLAGS = $(LOC) $(DEFINES) -O2 -Wall
 
 NASM = nasmw
@@ -36,7 +36,7 @@ AR = ar
 ARFLAGS = rcs
 
 OBJS = cpu_asm.o fixed_asm.o lpc_asm.o \
-       bitbuffer.o bitmath.o cpu.o crc.o fixed.o format.o lpc.o memory.o stream_decoder.o stream_decoder_pp.o
+       bitmath.o bitreader.o cpu.o crc.o fixed.o format.o lpc.o md5.c memory.o stream_decoder.o stream_decoder_pp.o
 
 all: $(STATICLIB)
 
diff --git a/FLAC/ReadMe.txt b/FLAC/ReadMe.txt
index 6bc5f3e613..db5d2e9b08 100644
--- a/FLAC/ReadMe.txt
+++ b/FLAC/ReadMe.txt
@@ -1,6 +1,7 @@
 This is not the complete FLAC distribution. It contains only what
 ZDoom needs to decode compressed FLAC streams. For the complete
-distribution, please visit <http://flac.sourceforge.net/>.
+distribution, please visit <http://flac.sourceforge.net/>. The
+version here is currently version 1.2.1.
 
 Of course, under Linux, you are encouraged to ignore this source
 here and dynamically link to the full libraries.
\ No newline at end of file
diff --git a/FLAC/bitbuffer.c b/FLAC/bitbuffer.c
deleted file mode 100644
index 7eaab0ab6b..0000000000
--- a/FLAC/bitbuffer.c
+++ /dev/null
@@ -1,2539 +0,0 @@
-/* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005  Josh Coalson
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * - Neither the name of the Xiph.org Foundation nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stdlib.h> /* for malloc() */
-#include <string.h> /* for memcpy(), memset() */
-#include "private/bitbuffer.h"
-#include "private/bitmath.h"
-#include "private/crc.h"
-#include "FLAC/assert.h"
-
-/*
- * Along the way you will see two versions of some functions, selected
- * by a FLAC__NO_MANUAL_INLINING macro.  One is the simplified, more
- * readable, and slow version, and the other is the same function
- * where crucial parts have been manually inlined and are much faster.
- *
- */
-
-/*
- * Some optimization strategies are slower with older versions of MSVC
- */
-#if defined _MSC_VER && _MSC_VER <= 1200
-#define FLAC__OLD_MSVC_FLAVOR
-#endif
-
-/*
- * This should be at least twice as large as the largest number of blurbs
- * required to represent any 'number' (in any encoding) you are going to
- * read.  With FLAC this is on the order of maybe a few hundred bits.
- * If the buffer is smaller than that, the decoder won't be able to read
- * in a whole number that is in a variable length encoding (e.g. Rice).
- *
- * The number we are actually using here is based on what would be the
- * approximate maximum size of a verbatim frame at the default block size,
- * for CD audio (4096 sample * 4 bytes per sample), plus some wiggle room.
- * 32kbytes sounds reasonable.  For kicks we subtract out 64 bytes for any
- * alignment or malloc overhead.
- *
- * Increase this number to decrease the number of read callbacks, at the
- * expense of using more memory.  Or decrease for the reverse effect,
- * keeping in mind the limit from the first paragraph.
- */
-static const unsigned FLAC__BITBUFFER_DEFAULT_CAPACITY = ((65536 - 64) * 8) / FLAC__BITS_PER_BLURB; /* blurbs */
-
-#ifndef FLAC__OLD_MSVC_FLAVOR
-static const unsigned char byte_to_unary_table[] = {
-	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
-	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-#endif
-
-#if FLAC__BITS_PER_BLURB == 8
-#define FLAC__BITS_PER_BLURB_LOG2 3
-#define FLAC__BYTES_PER_BLURB 1
-#define FLAC__BLURB_ALL_ONES ((FLAC__byte)0xff)
-#define FLAC__BLURB_TOP_BIT_ONE ((FLAC__byte)0x80)
-#define BLURB_BIT_TO_MASK(b) (((FLAC__blurb)'\x80') >> (b))
-#define CRC16_UPDATE_BLURB(bb, blurb, crc) FLAC__CRC16_UPDATE((blurb), (crc));
-#ifndef FLAC__OLD_MSVC_FLAVOR
-#define FLAC__ALIGNED_BLURB_UNARY(blurb) (byte_to_unary_table[blurb])
-#endif
-#elif FLAC__BITS_PER_BLURB == 32
-#define FLAC__BITS_PER_BLURB_LOG2 5
-#define FLAC__BYTES_PER_BLURB 4
-#define FLAC__BLURB_ALL_ONES ((FLAC__uint32)0xffffffff)
-#define FLAC__BLURB_TOP_BIT_ONE ((FLAC__uint32)0x80000000)
-#define BLURB_BIT_TO_MASK(b) (((FLAC__blurb)0x80000000) >> (b))
-#define CRC16_UPDATE_BLURB(bb, blurb, crc) crc16_update_blurb((bb), (blurb));
-#ifndef FLAC__OLD_MSVC_FLAVOR
-#define FLAC__ALIGNED_BLURB_UNARY(blurb) ((blurb) <= 0xff ? byte_to_unary_table[blurb] + 24 : ((blurb) <= 0xffff ? byte_to_unary_table[(blurb) >> 8] + 16 : ((blurb) <= 0xffffff ? byte_to_unary_table[(blurb) >> 16] + 8 : byte_to_unary_table[(blurb) >> 24])))
-#endif
-#else
-/* ERROR, only sizes of 8 and 32 are supported */
-#endif
-
-#define FLAC__BLURBS_TO_BITS(blurbs) ((blurbs) << FLAC__BITS_PER_BLURB_LOG2)
-
-#ifdef min
-#undef min
-#endif
-#define min(x,y) ((x)<(y)?(x):(y))
-#ifdef max
-#undef max
-#endif
-#define max(x,y) ((x)>(y)?(x):(y))
-
-/* adjust for compilers that can't understand using LLU suffix for uint64_t literals */
-#ifdef _MSC_VER
-#define FLAC__U64L(x) x
-#else
-#define FLAC__U64L(x) x##LLU
-#endif
-
-#ifndef FLaC__INLINE
-#define FLaC__INLINE
-#endif
-
-struct FLAC__BitBuffer {
-	FLAC__blurb *buffer;
-	unsigned capacity; /* in blurbs */
-	unsigned blurbs, bits;
-	unsigned total_bits; /* must always == FLAC__BITS_PER_BLURB*blurbs+bits */
-	unsigned consumed_blurbs, consumed_bits;
-	unsigned total_consumed_bits; /* must always == FLAC__BITS_PER_BLURB*consumed_blurbs+consumed_bits */
-	FLAC__uint16 read_crc16;
-#if FLAC__BITS_PER_BLURB == 32
-	unsigned crc16_align;
-#endif
-	FLAC__blurb save_head, save_tail;
-};
-
-#if FLAC__BITS_PER_BLURB == 32
-static void crc16_update_blurb(FLAC__BitBuffer *bb, FLAC__blurb blurb)
-{
-	if(bb->crc16_align == 0) {
-		FLAC__CRC16_UPDATE(blurb >> 24, bb->read_crc16);
-		FLAC__CRC16_UPDATE((blurb >> 16) & 0xff, bb->read_crc16);
-		FLAC__CRC16_UPDATE((blurb >> 8) & 0xff, bb->read_crc16);
-		FLAC__CRC16_UPDATE(blurb & 0xff, bb->read_crc16);
-	}
-	else if(bb->crc16_align == 8) {
-		FLAC__CRC16_UPDATE((blurb >> 16) & 0xff, bb->read_crc16);
-		FLAC__CRC16_UPDATE((blurb >> 8) & 0xff, bb->read_crc16);
-		FLAC__CRC16_UPDATE(blurb & 0xff, bb->read_crc16);
-	}
-	else if(bb->crc16_align == 16) {
-		FLAC__CRC16_UPDATE((blurb >> 8) & 0xff, bb->read_crc16);
-		FLAC__CRC16_UPDATE(blurb & 0xff, bb->read_crc16);
-	}
-	else if(bb->crc16_align == 24) {
-		FLAC__CRC16_UPDATE(blurb & 0xff, bb->read_crc16);
-	}
-	bb->crc16_align = 0;
-}
-#endif
-
-/*
- * WATCHOUT: The current implentation is not friendly to shrinking, i.e. it
- * does not shift left what is consumed, it just chops off the end, whether
- * there is unconsumed data there or not.  This is OK because currently we
- * never shrink the buffer, but if this ever changes, we'll have to do some
- * fixups here.
- */
-static FLAC__bool bitbuffer_resize_(FLAC__BitBuffer *bb, unsigned new_capacity)
-{
-	FLAC__blurb *new_buffer;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-
-	if(bb->capacity == new_capacity)
-		return true;
-
-	new_buffer = (FLAC__blurb*)calloc(new_capacity, sizeof(FLAC__blurb));
-	if(new_buffer == 0)
-		return false;
-	memcpy(new_buffer, bb->buffer, sizeof(FLAC__blurb)*min(bb->blurbs+(bb->bits?1:0), new_capacity));
-	if(new_capacity < bb->blurbs+(bb->bits?1:0)) {
-		bb->blurbs = new_capacity;
-		bb->bits = 0;
-		bb->total_bits = FLAC__BLURBS_TO_BITS(new_capacity);
-	}
-	if(new_capacity < bb->consumed_blurbs+(bb->consumed_bits?1:0)) {
-		bb->consumed_blurbs = new_capacity;
-		bb->consumed_bits = 0;
-		bb->total_consumed_bits = FLAC__BLURBS_TO_BITS(new_capacity);
-	}
-	free(bb->buffer); /* we've already asserted above that (0 != bb->buffer) */
-	bb->buffer = new_buffer;
-	bb->capacity = new_capacity;
-	return true;
-}
-
-static FLAC__bool bitbuffer_grow_(FLAC__BitBuffer *bb, unsigned min_blurbs_to_add)
-{
-	unsigned new_capacity;
-
-	FLAC__ASSERT(min_blurbs_to_add > 0);
-
-	new_capacity = max(bb->capacity * 2, bb->capacity + min_blurbs_to_add);
-	return bitbuffer_resize_(bb, new_capacity);
-}
-
-static FLAC__bool bitbuffer_ensure_size_(FLAC__BitBuffer *bb, unsigned bits_to_add)
-{
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-
-	if(FLAC__BLURBS_TO_BITS(bb->capacity) < bb->total_bits + bits_to_add)
-		return bitbuffer_grow_(bb, (bits_to_add >> FLAC__BITS_PER_BLURB_LOG2) + 2);
-	else
-		return true;
-}
-
-static FLAC__bool bitbuffer_read_from_client_(FLAC__BitBuffer *bb, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data)
-{
-	unsigned bytes;
-	FLAC__byte *target;
-
-	/* first shift the unconsumed buffer data toward the front as much as possible */
-	if(bb->total_consumed_bits >= FLAC__BITS_PER_BLURB) {
-#if FLAC__BITS_PER_BLURB == 8
-		/*
-		 * memset and memcpy are usually implemented in assembly language
-		 * by the system libc, and they can be much faster
-		 */
-		const unsigned r_end = bb->blurbs + (bb->bits? 1:0);
-		const unsigned r = bb->consumed_blurbs, l = r_end - r;
-		memmove(&bb->buffer[0], &bb->buffer[r], l);
-		memset(&bb->buffer[l], 0, r);
-#elif FLAC__BITS_PER_BLURB == 32
-		/* still needs optimization */
-		const unsigned r_end = bb->blurbs + (bb->bits? 1:0);
-		unsigned l = 0, r = bb->consumed_blurbs;
-		for( ; r < r_end; l++, r++)
-			bb->buffer[l] = bb->buffer[r];
-		for( ; l < r_end; l++)
-			bb->buffer[l] = 0;
-#else
-		FLAC__ASSERT(false); /* ERROR, only sizes of 8 and 32 are supported */
-#endif /* FLAC__BITS_PER_BLURB == 32 or 8 */
-
-		bb->blurbs -= bb->consumed_blurbs;
-		bb->total_bits -= FLAC__BLURBS_TO_BITS(bb->consumed_blurbs);
-		bb->consumed_blurbs = 0;
-		bb->total_consumed_bits = bb->consumed_bits;
-	}
-
-	/* grow if we need to */
-	if(bb->capacity <= 1) {
-		if(!bitbuffer_resize_(bb, 16))
-			return false;
-	}
-
-	/* set the target for reading, taking into account blurb alignment */
-#if FLAC__BITS_PER_BLURB == 8
-	/* blurb == byte, so no gyrations necessary: */
-	target = bb->buffer + bb->blurbs;
-	bytes = bb->capacity - bb->blurbs;
-#elif FLAC__BITS_PER_BLURB == 32
-	/* @@@ WATCHOUT: code currently only works for big-endian: */
-	FLAC__ASSERT((bb->bits & 7) == 0);
-	target = (FLAC__byte*)(bb->buffer + bb->blurbs) + (bb->bits >> 3);
-	bytes = ((bb->capacity - bb->blurbs) << 2) - (bb->bits >> 3); /* i.e. (bb->capacity - bb->blurbs) * FLAC__BYTES_PER_BLURB - (bb->bits / 8) */
-#else
-	FLAC__ASSERT(false); /* ERROR, only sizes of 8 and 32 are supported */
-#endif
-
-	/* finally, read in some data */
-	if(!read_callback(target, &bytes, client_data))
-		return false;
-
-	/* now we have to handle partial blurb cases: */
-#if FLAC__BITS_PER_BLURB == 8
-	/* blurb == byte, so no gyrations necessary: */
-	bb->blurbs += bytes;
-	bb->total_bits += FLAC__BLURBS_TO_BITS(bytes);
-#elif FLAC__BITS_PER_BLURB == 32
-	/* @@@ WATCHOUT: code currently only works for big-endian: */
-	{
-		const unsigned aligned_bytes = (bb->bits >> 3) + bytes;
-		bb->blurbs += (aligned_bytes >> 2); /* i.e. aligned_bytes / FLAC__BYTES_PER_BLURB */
-		bb->bits = (aligned_bytes & 3u) << 3; /* i.e. (aligned_bytes % FLAC__BYTES_PER_BLURB) * 8 */
-		bb->total_bits += (bytes << 3);
-	}
-#else
-	FLAC__ASSERT(false); /* ERROR, only sizes of 8 and 32 are supported */
-#endif
-	return true;
-}
-
-/***********************************************************************
- *
- * Class constructor/destructor
- *
- ***********************************************************************/
-
-FLAC__BitBuffer *FLAC__bitbuffer_new()
-{
-	FLAC__BitBuffer *bb = (FLAC__BitBuffer*)calloc(1, sizeof(FLAC__BitBuffer));
-
-	/* calloc() implies:
-		memset(bb, 0, sizeof(FLAC__BitBuffer));
-		bb->buffer = 0;
-		bb->capacity = 0;
-		bb->blurbs = bb->bits = bb->total_bits = 0;
-		bb->consumed_blurbs = bb->consumed_bits = bb->total_consumed_bits = 0;
-	*/
-	return bb;
-}
-
-void FLAC__bitbuffer_delete(FLAC__BitBuffer *bb)
-{
-	FLAC__ASSERT(0 != bb);
-
-	FLAC__bitbuffer_free(bb);
-	free(bb);
-}
-
-/***********************************************************************
- *
- * Public class methods
- *
- ***********************************************************************/
-
-FLAC__bool FLAC__bitbuffer_init(FLAC__BitBuffer *bb)
-{
-	FLAC__ASSERT(0 != bb);
-
-	bb->buffer = 0;
-	bb->capacity = 0;
-	bb->blurbs = bb->bits = bb->total_bits = 0;
-	bb->consumed_blurbs = bb->consumed_bits = bb->total_consumed_bits = 0;
-
-	return FLAC__bitbuffer_clear(bb);
-}
-
-FLAC__bool FLAC__bitbuffer_init_from(FLAC__BitBuffer *bb, const FLAC__byte buffer[], unsigned bytes)
-{
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(bytes > 0);
-
-	if(!FLAC__bitbuffer_init(bb))
-		return false;
-
-	if(!bitbuffer_ensure_size_(bb, bytes << 3))
-		return false;
-
-	FLAC__ASSERT(0 != buffer);
-	/* @@@ WATCHOUT: code currently only works for 8-bits-per-blurb inclusive-or big-endian: */
-	memcpy((FLAC__byte*)bb->buffer, buffer, sizeof(FLAC__byte)*bytes);
-	bb->blurbs = bytes / FLAC__BYTES_PER_BLURB;
-	bb->bits = (bytes % FLAC__BYTES_PER_BLURB) << 3;
-	bb->total_bits = bytes << 3;
-	return true;
-}
-
-FLAC__bool FLAC__bitbuffer_concatenate_aligned(FLAC__BitBuffer *dest, const FLAC__BitBuffer *src)
-{
-	unsigned bits_to_add = src->total_bits - src->total_consumed_bits;
-
-	FLAC__ASSERT(0 != dest);
-	FLAC__ASSERT(0 != src);
-
-	if(bits_to_add == 0)
-		return true;
-	if(dest->bits != src->consumed_bits)
-		return false;
-	if(!bitbuffer_ensure_size_(dest, bits_to_add))
-		return false;
-	if(dest->bits == 0) {
-		memcpy(dest->buffer+dest->blurbs, src->buffer+src->consumed_blurbs, sizeof(FLAC__blurb)*(src->blurbs-src->consumed_blurbs + ((src->bits)? 1:0)));
-	}
-	else if(dest->bits + bits_to_add > FLAC__BITS_PER_BLURB) {
-		dest->buffer[dest->blurbs] <<= (FLAC__BITS_PER_BLURB - dest->bits);
-		dest->buffer[dest->blurbs] |= (src->buffer[src->consumed_blurbs] & ((1u << (FLAC__BITS_PER_BLURB-dest->bits)) - 1));
-		memcpy(dest->buffer+dest->blurbs+1, src->buffer+src->consumed_blurbs+1, sizeof(FLAC__blurb)*(src->blurbs-src->consumed_blurbs-1 + ((src->bits)? 1:0)));
-	}
-	else {
-		dest->buffer[dest->blurbs] <<= bits_to_add;
-		dest->buffer[dest->blurbs] |= (src->buffer[src->consumed_blurbs] & ((1u << bits_to_add) - 1));
-	}
-	dest->bits = src->bits;
-	dest->total_bits += bits_to_add;
-	dest->blurbs = dest->total_bits / FLAC__BITS_PER_BLURB;
-
-	return true;
-}
-
-void FLAC__bitbuffer_free(FLAC__BitBuffer *bb)
-{
-	FLAC__ASSERT(0 != bb);
-
-	if(0 != bb->buffer)
-		free(bb->buffer);
-	bb->buffer = 0;
-	bb->capacity = 0;
-	bb->blurbs = bb->bits = bb->total_bits = 0;
-	bb->consumed_blurbs = bb->consumed_bits = bb->total_consumed_bits = 0;
-}
-
-FLAC__bool FLAC__bitbuffer_clear(FLAC__BitBuffer *bb)
-{
-	if(bb->buffer == 0) {
-		bb->capacity = FLAC__BITBUFFER_DEFAULT_CAPACITY;
-		bb->buffer = (FLAC__blurb*)calloc(bb->capacity, sizeof(FLAC__blurb));
-		if(bb->buffer == 0)
-			return false;
-	}
-	else {
-		memset(bb->buffer, 0, bb->blurbs + (bb->bits?1:0));
-	}
-	bb->blurbs = bb->bits = bb->total_bits = 0;
-	bb->consumed_blurbs = bb->consumed_bits = bb->total_consumed_bits = 0;
-	return true;
-}
-
-FLAC__bool FLAC__bitbuffer_clone(FLAC__BitBuffer *dest, const FLAC__BitBuffer *src)
-{
-	FLAC__ASSERT(0 != dest);
-	FLAC__ASSERT(0 != dest->buffer);
-	FLAC__ASSERT(0 != src);
-	FLAC__ASSERT(0 != src->buffer);
-
-	if(dest->capacity < src->capacity)
-		if(!bitbuffer_resize_(dest, src->capacity))
-			return false;
-	memcpy(dest->buffer, src->buffer, sizeof(FLAC__blurb)*min(src->capacity, src->blurbs+1));
-	dest->blurbs = src->blurbs;
-	dest->bits = src->bits;
-	dest->total_bits = src->total_bits;
-	dest->consumed_blurbs = src->consumed_blurbs;
-	dest->consumed_bits = src->consumed_bits;
-	dest->total_consumed_bits = src->total_consumed_bits;
-	dest->read_crc16 = src->read_crc16;
-	return true;
-}
-
-void FLAC__bitbuffer_reset_read_crc16(FLAC__BitBuffer *bb, FLAC__uint16 seed)
-{
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-	FLAC__ASSERT((bb->consumed_bits & 7) == 0);
-
-	bb->read_crc16 = seed;
-#if FLAC__BITS_PER_BLURB == 8
-	/* no need to do anything */
-#elif FLAC__BITS_PER_BLURB == 32
-	bb->crc16_align = bb->consumed_bits;
-#else
-	FLAC__ASSERT(false); /* ERROR, only sizes of 8 and 32 are supported */
-#endif
-}
-
-FLAC__uint16 FLAC__bitbuffer_get_read_crc16(FLAC__BitBuffer *bb)
-{
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-	FLAC__ASSERT((bb->bits & 7) == 0);
-	FLAC__ASSERT((bb->consumed_bits & 7) == 0);
-
-#if FLAC__BITS_PER_BLURB == 8
-	/* no need to do anything */
-#elif FLAC__BITS_PER_BLURB == 32
-	/*@@@ BUG: even though this probably can't happen with FLAC, need to fix the case where we are called here for the very first blurb and crc16_align is > 0 */
-	if(bb->bits == 0 || bb->consumed_blurbs < bb->blurbs) {
-		if(bb->consumed_bits == 8) {
-			const FLAC__blurb blurb = bb->buffer[bb->consumed_blurbs];
-			FLAC__CRC16_UPDATE(blurb >> 24, bb->read_crc16);
-		}
-		else if(bb->consumed_bits == 16) {
-			const FLAC__blurb blurb = bb->buffer[bb->consumed_blurbs];
-			FLAC__CRC16_UPDATE(blurb >> 24, bb->read_crc16);
-			FLAC__CRC16_UPDATE((blurb >> 16) & 0xff, bb->read_crc16);
-		}
-		else if(bb->consumed_bits == 24) {
-			const FLAC__blurb blurb = bb->buffer[bb->consumed_blurbs];
-			FLAC__CRC16_UPDATE(blurb >> 24, bb->read_crc16);
-			FLAC__CRC16_UPDATE((blurb >> 16) & 0xff, bb->read_crc16);
-			FLAC__CRC16_UPDATE((blurb >> 8) & 0xff, bb->read_crc16);
-		}
-	}
-	else {
-		if(bb->consumed_bits == 8) {
-			const FLAC__blurb blurb = bb->buffer[bb->consumed_blurbs];
-			FLAC__CRC16_UPDATE(blurb >> (bb->bits-8), bb->read_crc16);
-		}
-		else if(bb->consumed_bits == 16) {
-			const FLAC__blurb blurb = bb->buffer[bb->consumed_blurbs];
-			FLAC__CRC16_UPDATE(blurb >> (bb->bits-8), bb->read_crc16);
-			FLAC__CRC16_UPDATE((blurb >> (bb->bits-16)) & 0xff, bb->read_crc16);
-		}
-		else if(bb->consumed_bits == 24) {
-			const FLAC__blurb blurb = bb->buffer[bb->consumed_blurbs];
-			FLAC__CRC16_UPDATE(blurb >> (bb->bits-8), bb->read_crc16);
-			FLAC__CRC16_UPDATE((blurb >> (bb->bits-16)) & 0xff, bb->read_crc16);
-			FLAC__CRC16_UPDATE((blurb >> (bb->bits-24)) & 0xff, bb->read_crc16);
-		}
-	}
-	bb->crc16_align = bb->consumed_bits;
-#else
-	FLAC__ASSERT(false); /* ERROR, only sizes of 8 and 32 are supported */
-#endif
-	return bb->read_crc16;
-}
-
-FLAC__uint16 FLAC__bitbuffer_get_write_crc16(const FLAC__BitBuffer *bb)
-{
-	FLAC__ASSERT((bb->bits & 7) == 0); /* assert that we're byte-aligned */
-
-#if FLAC__BITS_PER_BLURB == 8
-	return FLAC__crc16(bb->buffer, bb->blurbs);
-#elif FLAC__BITS_PER_BLURB == 32
-	/* @@@ WATCHOUT: code currently only works for big-endian: */
-	return FLAC__crc16((FLAC__byte*)(bb->buffer), (bb->blurbs * FLAC__BYTES_PER_BLURB) + (bb->bits >> 3));
-#else
-	FLAC__ASSERT(false); /* ERROR, only sizes of 8 and 32 are supported */
-#endif
-}
-
-FLAC__byte FLAC__bitbuffer_get_write_crc8(const FLAC__BitBuffer *bb)
-{
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT((bb->bits & 7) == 0); /* assert that we're byte-aligned */
-	FLAC__ASSERT(bb->buffer[0] == 0xff); /* MAGIC NUMBER for the first byte of the sync code */
-#if FLAC__BITS_PER_BLURB == 8
-	return FLAC__crc8(bb->buffer, bb->blurbs);
-#elif FLAC__BITS_PER_BLURB == 32
-	/* @@@ WATCHOUT: code currently only works for big-endian: */
-	return FLAC__crc8((FLAC__byte*)(bb->buffer), (bb->blurbs * FLAC__BYTES_PER_BLURB) + (bb->bits >> 3));
-#else
-	FLAC__ASSERT(false); /* ERROR, only sizes of 8 and 32 are supported */
-#endif
-}
-
-FLAC__bool FLAC__bitbuffer_is_byte_aligned(const FLAC__BitBuffer *bb)
-{
-	return ((bb->bits & 7) == 0);
-}
-
-FLAC__bool FLAC__bitbuffer_is_consumed_byte_aligned(const FLAC__BitBuffer *bb)
-{
-	return ((bb->consumed_bits & 7) == 0);
-}
-
-unsigned FLAC__bitbuffer_bits_left_for_byte_alignment(const FLAC__BitBuffer *bb)
-{
-	return 8 - (bb->consumed_bits & 7);
-}
-
-unsigned FLAC__bitbuffer_get_input_bytes_unconsumed(const FLAC__BitBuffer *bb)
-{
-	FLAC__ASSERT((bb->consumed_bits & 7) == 0 && (bb->bits & 7) == 0);
-	return (bb->total_bits - bb->total_consumed_bits) >> 3;
-}
-
-void FLAC__bitbuffer_get_buffer(FLAC__BitBuffer *bb, const FLAC__byte **buffer, unsigned *bytes)
-{
-	FLAC__ASSERT((bb->consumed_bits & 7) == 0 && (bb->bits & 7) == 0);
-#if FLAC__BITS_PER_BLURB == 8
-	*buffer = bb->buffer + bb->consumed_blurbs;
-	*bytes = bb->blurbs - bb->consumed_blurbs;
-#elif FLAC__BITS_PER_BLURB == 32
-	/* @@@ WATCHOUT: code currently only works for big-endian: */
-	*buffer = (FLAC__byte*)(bb->buffer + bb->consumed_blurbs) + (bb->consumed_bits >> 3);
-	*bytes = (bb->total_bits - bb->total_consumed_bits) >> 3;
-#else
-	FLAC__ASSERT(false); /* ERROR, only sizes of 8 and 32 are supported */
-#endif
-}
-
-void FLAC__bitbuffer_release_buffer(FLAC__BitBuffer *bb)
-{
-#if FLAC__BITS_PER_BLURB == 8
-	(void)bb;
-#elif FLAC__BITS_PER_BLURB == 32
-	/* @@@ WATCHOUT: code currently only works for big-endian: */
-	(void)bb;
-#else
-	FLAC__ASSERT(false); /* ERROR, only sizes of 8 and 32 are supported */
-#endif
-}
-
-FLAC__bool FLAC__bitbuffer_write_zeroes(FLAC__BitBuffer *bb, unsigned bits)
-{
-	unsigned n;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-
-	if(bits == 0)
-		return true;
-	if(!bitbuffer_ensure_size_(bb, bits))
-		return false;
-	bb->total_bits += bits;
-	while(bits > 0) {
-		n = min(FLAC__BITS_PER_BLURB - bb->bits, bits);
-		bb->buffer[bb->blurbs] <<= n;
-		bits -= n;
-		bb->bits += n;
-		if(bb->bits == FLAC__BITS_PER_BLURB) {
-			bb->blurbs++;
-			bb->bits = 0;
-		}
-	}
-	return true;
-}
-
-FLaC__INLINE FLAC__bool FLAC__bitbuffer_write_raw_uint32(FLAC__BitBuffer *bb, FLAC__uint32 val, unsigned bits)
-{
-	unsigned n, k;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-
-	FLAC__ASSERT(bits <= 32);
-	if(bits == 0)
-		return true;
-	/* inline the size check so we don't incure a function call unnecessarily */
-	if(FLAC__BLURBS_TO_BITS(bb->capacity) < bb->total_bits + bits) {
-		if(!bitbuffer_ensure_size_(bb, bits))
-			return false;
-	}
-
-	/* zero-out unused bits; WATCHOUT: other code relies on this, so this needs to stay */
-	if(bits < 32) /* @@@ gcc seems to require this because the following line causes incorrect results when bits==32; investigate */
-		val &= (~(0xffffffff << bits)); /* zero-out unused bits */
-
-	bb->total_bits += bits;
-	while(bits > 0) {
-		n = FLAC__BITS_PER_BLURB - bb->bits;
-		if(n == FLAC__BITS_PER_BLURB) { /* i.e. bb->bits == 0 */
-			if(bits < FLAC__BITS_PER_BLURB) {
-				bb->buffer[bb->blurbs] = (FLAC__blurb)val;
-				bb->bits = bits;
-				break;
-			}
-			else if(bits == FLAC__BITS_PER_BLURB) {
-				bb->buffer[bb->blurbs++] = (FLAC__blurb)val;
-				break;
-			}
-			else {
-				k = bits - FLAC__BITS_PER_BLURB;
-				bb->buffer[bb->blurbs++] = (FLAC__blurb)(val >> k);
-				/* we know k < 32 so no need to protect against the gcc bug mentioned above */
-				val &= (~(0xffffffff << k));
-				bits -= FLAC__BITS_PER_BLURB;
-			}
-		}
-		else if(bits <= n) {
-			bb->buffer[bb->blurbs] <<= bits;
-			bb->buffer[bb->blurbs] |= val;
-			if(bits == n) {
-				bb->blurbs++;
-				bb->bits = 0;
-			}
-			else
-				bb->bits += bits;
-			break;
-		}
-		else {
-			k = bits - n;
-			bb->buffer[bb->blurbs] <<= n;
-			bb->buffer[bb->blurbs] |= (val >> k);
-			/* we know n > 0 so k < 32 so no need to protect against the gcc bug mentioned above */
-			val &= (~(0xffffffff << k));
-			bits -= n;
-			bb->blurbs++;
-			bb->bits = 0;
-		}
-	}
-
-	return true;
-}
-
-FLAC__bool FLAC__bitbuffer_write_raw_int32(FLAC__BitBuffer *bb, FLAC__int32 val, unsigned bits)
-{
-	return FLAC__bitbuffer_write_raw_uint32(bb, (FLAC__uint32)val, bits);
-}
-
-FLAC__bool FLAC__bitbuffer_write_raw_uint64(FLAC__BitBuffer *bb, FLAC__uint64 val, unsigned bits)
-{
-	static const FLAC__uint64 mask[] = {
-		0,
-		FLAC__U64L(0x0000000000000001), FLAC__U64L(0x0000000000000003), FLAC__U64L(0x0000000000000007), FLAC__U64L(0x000000000000000F),
-		FLAC__U64L(0x000000000000001F), FLAC__U64L(0x000000000000003F), FLAC__U64L(0x000000000000007F), FLAC__U64L(0x00000000000000FF),
-		FLAC__U64L(0x00000000000001FF), FLAC__U64L(0x00000000000003FF), FLAC__U64L(0x00000000000007FF), FLAC__U64L(0x0000000000000FFF),
-		FLAC__U64L(0x0000000000001FFF), FLAC__U64L(0x0000000000003FFF), FLAC__U64L(0x0000000000007FFF), FLAC__U64L(0x000000000000FFFF),
-		FLAC__U64L(0x000000000001FFFF), FLAC__U64L(0x000000000003FFFF), FLAC__U64L(0x000000000007FFFF), FLAC__U64L(0x00000000000FFFFF),
-		FLAC__U64L(0x00000000001FFFFF), FLAC__U64L(0x00000000003FFFFF), FLAC__U64L(0x00000000007FFFFF), FLAC__U64L(0x0000000000FFFFFF),
-		FLAC__U64L(0x0000000001FFFFFF), FLAC__U64L(0x0000000003FFFFFF), FLAC__U64L(0x0000000007FFFFFF), FLAC__U64L(0x000000000FFFFFFF),
-		FLAC__U64L(0x000000001FFFFFFF), FLAC__U64L(0x000000003FFFFFFF), FLAC__U64L(0x000000007FFFFFFF), FLAC__U64L(0x00000000FFFFFFFF),
-		FLAC__U64L(0x00000001FFFFFFFF), FLAC__U64L(0x00000003FFFFFFFF), FLAC__U64L(0x00000007FFFFFFFF), FLAC__U64L(0x0000000FFFFFFFFF),
-		FLAC__U64L(0x0000001FFFFFFFFF), FLAC__U64L(0x0000003FFFFFFFFF), FLAC__U64L(0x0000007FFFFFFFFF), FLAC__U64L(0x000000FFFFFFFFFF),
-		FLAC__U64L(0x000001FFFFFFFFFF), FLAC__U64L(0x000003FFFFFFFFFF), FLAC__U64L(0x000007FFFFFFFFFF), FLAC__U64L(0x00000FFFFFFFFFFF),
-		FLAC__U64L(0x00001FFFFFFFFFFF), FLAC__U64L(0x00003FFFFFFFFFFF), FLAC__U64L(0x00007FFFFFFFFFFF), FLAC__U64L(0x0000FFFFFFFFFFFF),
-		FLAC__U64L(0x0001FFFFFFFFFFFF), FLAC__U64L(0x0003FFFFFFFFFFFF), FLAC__U64L(0x0007FFFFFFFFFFFF), FLAC__U64L(0x000FFFFFFFFFFFFF),
-		FLAC__U64L(0x001FFFFFFFFFFFFF), FLAC__U64L(0x003FFFFFFFFFFFFF), FLAC__U64L(0x007FFFFFFFFFFFFF), FLAC__U64L(0x00FFFFFFFFFFFFFF),
-		FLAC__U64L(0x01FFFFFFFFFFFFFF), FLAC__U64L(0x03FFFFFFFFFFFFFF), FLAC__U64L(0x07FFFFFFFFFFFFFF), FLAC__U64L(0x0FFFFFFFFFFFFFFF),
-		FLAC__U64L(0x1FFFFFFFFFFFFFFF), FLAC__U64L(0x3FFFFFFFFFFFFFFF), FLAC__U64L(0x7FFFFFFFFFFFFFFF), FLAC__U64L(0xFFFFFFFFFFFFFFFF)
-	};
-	unsigned n, k;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-
-	FLAC__ASSERT(bits <= 64);
-	if(bits == 0)
-		return true;
-	if(!bitbuffer_ensure_size_(bb, bits))
-		return false;
-	val &= mask[bits];
-	bb->total_bits += bits;
-	while(bits > 0) {
-		if(bb->bits == 0) {
-			if(bits < FLAC__BITS_PER_BLURB) {
-				bb->buffer[bb->blurbs] = (FLAC__blurb)val;
-				bb->bits = bits;
-				break;
-			}
-			else if(bits == FLAC__BITS_PER_BLURB) {
-				bb->buffer[bb->blurbs++] = (FLAC__blurb)val;
-				break;
-			}
-			else {
-				k = bits - FLAC__BITS_PER_BLURB;
-				bb->buffer[bb->blurbs++] = (FLAC__blurb)(val >> k);
-				/* we know k < 64 so no need to protect against the gcc bug mentioned above */
-				val &= (~(FLAC__U64L(0xffffffffffffffff) << k));
-				bits -= FLAC__BITS_PER_BLURB;
-			}
-		}
-		else {
-			n = min(FLAC__BITS_PER_BLURB - bb->bits, bits);
-			k = bits - n;
-			bb->buffer[bb->blurbs] <<= n;
-			bb->buffer[bb->blurbs] |= (val >> k);
-			/* we know n > 0 so k < 64 so no need to protect against the gcc bug mentioned above */
-			val &= (~(FLAC__U64L(0xffffffffffffffff) << k));
-			bits -= n;
-			bb->bits += n;
-			if(bb->bits == FLAC__BITS_PER_BLURB) {
-				bb->blurbs++;
-				bb->bits = 0;
-			}
-		}
-	}
-
-	return true;
-}
-
-#if 0 /* UNUSED */
-FLAC__bool FLAC__bitbuffer_write_raw_int64(FLAC__BitBuffer *bb, FLAC__int64 val, unsigned bits)
-{
-	return FLAC__bitbuffer_write_raw_uint64(bb, (FLAC__uint64)val, bits);
-}
-#endif
-
-FLaC__INLINE FLAC__bool FLAC__bitbuffer_write_raw_uint32_little_endian(FLAC__BitBuffer *bb, FLAC__uint32 val)
-{
-	/* this doesn't need to be that fast as currently it is only used for vorbis comments */
-
-	/* NOTE: we rely on the fact that FLAC__bitbuffer_write_raw_uint32() masks out the unused bits */
-	if(!FLAC__bitbuffer_write_raw_uint32(bb, val, 8))
-		return false;
-	if(!FLAC__bitbuffer_write_raw_uint32(bb, val>>8, 8))
-		return false;
-	if(!FLAC__bitbuffer_write_raw_uint32(bb, val>>16, 8))
-		return false;
-	if(!FLAC__bitbuffer_write_raw_uint32(bb, val>>24, 8))
-		return false;
-
-	return true;
-}
-
-FLaC__INLINE FLAC__bool FLAC__bitbuffer_write_byte_block(FLAC__BitBuffer *bb, const FLAC__byte vals[], unsigned nvals)
-{
-	unsigned i;
-
-	/* this could be faster but currently we don't need it to be */
-	for(i = 0; i < nvals; i++) {
-		if(!FLAC__bitbuffer_write_raw_uint32(bb, (FLAC__uint32)(vals[i]), 8))
-			return false;
-	}
-
-	return true;
-}
-
-FLAC__bool FLAC__bitbuffer_write_unary_unsigned(FLAC__BitBuffer *bb, unsigned val)
-{
-	if(val < 32)
-		return FLAC__bitbuffer_write_raw_uint32(bb, 1, ++val);
-	else if(val < 64)
-		return FLAC__bitbuffer_write_raw_uint64(bb, 1, ++val);
-	else {
-		if(!FLAC__bitbuffer_write_zeroes(bb, val))
-			return false;
-		return FLAC__bitbuffer_write_raw_uint32(bb, 1, 1);
-	}
-}
-
-unsigned FLAC__bitbuffer_rice_bits(int val, unsigned parameter)
-{
-	unsigned msbs, uval;
-
-	/* fold signed to unsigned */
-	if(val < 0)
-		/* equivalent to
-		 *     (unsigned)(((--val) << 1) - 1);
-		 * but without the overflow problem at MININT
-		 */
-		uval = (unsigned)(((-(++val)) << 1) + 1);
-	else
-		uval = (unsigned)(val << 1);
-
-	msbs = uval >> parameter;
-
-	return 1 + parameter + msbs;
-}
-
-#if 0 /* UNUSED */
-unsigned FLAC__bitbuffer_golomb_bits_signed(int val, unsigned parameter)
-{
-	unsigned bits, msbs, uval;
-	unsigned k;
-
-	FLAC__ASSERT(parameter > 0);
-
-	/* fold signed to unsigned */
-	if(val < 0)
-		/* equivalent to
-		 *     (unsigned)(((--val) << 1) - 1);
-		 * but without the overflow problem at MININT
-		 */
-		uval = (unsigned)(((-(++val)) << 1) + 1);
-	else
-		uval = (unsigned)(val << 1);
-
-	k = FLAC__bitmath_ilog2(parameter);
-	if(parameter == 1u<<k) {
-		FLAC__ASSERT(k <= 30);
-
-		msbs = uval >> k;
-		bits = 1 + k + msbs;
-	}
-	else {
-		unsigned q, r, d;
-
-		d = (1 << (k+1)) - parameter;
-		q = uval / parameter;
-		r = uval - (q * parameter);
-
-		bits = 1 + q + k;
-		if(r >= d)
-			bits++;
-	}
-	return bits;
-}
-
-unsigned FLAC__bitbuffer_golomb_bits_unsigned(unsigned uval, unsigned parameter)
-{
-	unsigned bits, msbs;
-	unsigned k;
-
-	FLAC__ASSERT(parameter > 0);
-
-	k = FLAC__bitmath_ilog2(parameter);
-	if(parameter == 1u<<k) {
-		FLAC__ASSERT(k <= 30);
-
-		msbs = uval >> k;
-		bits = 1 + k + msbs;
-	}
-	else {
-		unsigned q, r, d;
-
-		d = (1 << (k+1)) - parameter;
-		q = uval / parameter;
-		r = uval - (q * parameter);
-
-		bits = 1 + q + k;
-		if(r >= d)
-			bits++;
-	}
-	return bits;
-}
-#endif /* UNUSED */
-
-FLAC__bool FLAC__bitbuffer_write_rice_signed(FLAC__BitBuffer *bb, int val, unsigned parameter)
-{
-	unsigned total_bits, interesting_bits, msbs, uval;
-	FLAC__uint32 pattern;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-	FLAC__ASSERT(parameter <= 30);
-
-	/* fold signed to unsigned */
-	if(val < 0)
-		/* equivalent to
-		 *     (unsigned)(((--val) << 1) - 1);
-		 * but without the overflow problem at MININT
-		 */
-		uval = (unsigned)(((-(++val)) << 1) + 1);
-	else
-		uval = (unsigned)(val << 1);
-
-	msbs = uval >> parameter;
-	interesting_bits = 1 + parameter;
-	total_bits = interesting_bits + msbs;
-	pattern = 1 << parameter; /* the unary end bit */
-	pattern |= (uval & ((1<<parameter)-1)); /* the binary LSBs */
-
-	if(total_bits <= 32) {
-		if(!FLAC__bitbuffer_write_raw_uint32(bb, pattern, total_bits))
-			return false;
-	}
-	else {
-		/* write the unary MSBs */
-		if(!FLAC__bitbuffer_write_zeroes(bb, msbs))
-			return false;
-		/* write the unary end bit and binary LSBs */
-		if(!FLAC__bitbuffer_write_raw_uint32(bb, pattern, interesting_bits))
-			return false;
-	}
-	return true;
-}
-
-#if 0 /* UNUSED */
-FLAC__bool FLAC__bitbuffer_write_rice_signed_guarded(FLAC__BitBuffer *bb, int val, unsigned parameter, unsigned max_bits, FLAC__bool *overflow)
-{
-	unsigned total_bits, interesting_bits, msbs, uval;
-	FLAC__uint32 pattern;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-	FLAC__ASSERT(parameter <= 30);
-
-	*overflow = false;
-
-	/* fold signed to unsigned */
-	if(val < 0)
-		/* equivalent to
-		 *     (unsigned)(((--val) << 1) - 1);
-		 * but without the overflow problem at MININT
-		 */
-		uval = (unsigned)(((-(++val)) << 1) + 1);
-	else
-		uval = (unsigned)(val << 1);
-
-	msbs = uval >> parameter;
-	interesting_bits = 1 + parameter;
-	total_bits = interesting_bits + msbs;
-	pattern = 1 << parameter; /* the unary end bit */
-	pattern |= (uval & ((1<<parameter)-1)); /* the binary LSBs */
-
-	if(total_bits <= 32) {
-		if(!FLAC__bitbuffer_write_raw_uint32(bb, pattern, total_bits))
-			return false;
-	}
-	else if(total_bits > max_bits) {
-		*overflow = true;
-		return true;
-	}
-	else {
-		/* write the unary MSBs */
-		if(!FLAC__bitbuffer_write_zeroes(bb, msbs))
-			return false;
-		/* write the unary end bit and binary LSBs */
-		if(!FLAC__bitbuffer_write_raw_uint32(bb, pattern, interesting_bits))
-			return false;
-	}
-	return true;
-}
-#endif /* UNUSED */
-
-#if 0 /* UNUSED */
-FLAC__bool FLAC__bitbuffer_write_golomb_signed(FLAC__BitBuffer *bb, int val, unsigned parameter)
-{
-	unsigned total_bits, msbs, uval;
-	unsigned k;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-	FLAC__ASSERT(parameter > 0);
-
-	/* fold signed to unsigned */
-	if(val < 0)
-		/* equivalent to
-		 *     (unsigned)(((--val) << 1) - 1);
-		 * but without the overflow problem at MININT
-		 */
-		uval = (unsigned)(((-(++val)) << 1) + 1);
-	else
-		uval = (unsigned)(val << 1);
-
-	k = FLAC__bitmath_ilog2(parameter);
-	if(parameter == 1u<<k) {
-		unsigned pattern;
-
-		FLAC__ASSERT(k <= 30);
-
-		msbs = uval >> k;
-		total_bits = 1 + k + msbs;
-		pattern = 1 << k; /* the unary end bit */
-		pattern |= (uval & ((1u<<k)-1)); /* the binary LSBs */
-
-		if(total_bits <= 32) {
-			if(!FLAC__bitbuffer_write_raw_uint32(bb, pattern, total_bits))
-				return false;
-		}
-		else {
-			/* write the unary MSBs */
-			if(!FLAC__bitbuffer_write_zeroes(bb, msbs))
-				return false;
-			/* write the unary end bit and binary LSBs */
-			if(!FLAC__bitbuffer_write_raw_uint32(bb, pattern, k+1))
-				return false;
-		}
-	}
-	else {
-		unsigned q, r, d;
-
-		d = (1 << (k+1)) - parameter;
-		q = uval / parameter;
-		r = uval - (q * parameter);
-		/* write the unary MSBs */
-		if(!FLAC__bitbuffer_write_zeroes(bb, q))
-			return false;
-		/* write the unary end bit */
-		if(!FLAC__bitbuffer_write_raw_uint32(bb, 1, 1))
-			return false;
-		/* write the binary LSBs */
-		if(r >= d) {
-			if(!FLAC__bitbuffer_write_raw_uint32(bb, r+d, k+1))
-				return false;
-		}
-		else {
-			if(!FLAC__bitbuffer_write_raw_uint32(bb, r, k))
-				return false;
-		}
-	}
-	return true;
-}
-
-FLAC__bool FLAC__bitbuffer_write_golomb_unsigned(FLAC__BitBuffer *bb, unsigned uval, unsigned parameter)
-{
-	unsigned total_bits, msbs;
-	unsigned k;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-	FLAC__ASSERT(parameter > 0);
-
-	k = FLAC__bitmath_ilog2(parameter);
-	if(parameter == 1u<<k) {
-		unsigned pattern;
-
-		FLAC__ASSERT(k <= 30);
-
-		msbs = uval >> k;
-		total_bits = 1 + k + msbs;
-		pattern = 1 << k; /* the unary end bit */
-		pattern |= (uval & ((1u<<k)-1)); /* the binary LSBs */
-
-		if(total_bits <= 32) {
-			if(!FLAC__bitbuffer_write_raw_uint32(bb, pattern, total_bits))
-				return false;
-		}
-		else {
-			/* write the unary MSBs */
-			if(!FLAC__bitbuffer_write_zeroes(bb, msbs))
-				return false;
-			/* write the unary end bit and binary LSBs */
-			if(!FLAC__bitbuffer_write_raw_uint32(bb, pattern, k+1))
-				return false;
-		}
-	}
-	else {
-		unsigned q, r, d;
-
-		d = (1 << (k+1)) - parameter;
-		q = uval / parameter;
-		r = uval - (q * parameter);
-		/* write the unary MSBs */
-		if(!FLAC__bitbuffer_write_zeroes(bb, q))
-			return false;
-		/* write the unary end bit */
-		if(!FLAC__bitbuffer_write_raw_uint32(bb, 1, 1))
-			return false;
-		/* write the binary LSBs */
-		if(r >= d) {
-			if(!FLAC__bitbuffer_write_raw_uint32(bb, r+d, k+1))
-				return false;
-		}
-		else {
-			if(!FLAC__bitbuffer_write_raw_uint32(bb, r, k))
-				return false;
-		}
-	}
-	return true;
-}
-#endif /* UNUSED */
-
-FLAC__bool FLAC__bitbuffer_write_utf8_uint32(FLAC__BitBuffer *bb, FLAC__uint32 val)
-{
-	FLAC__bool ok = 1;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-
-	FLAC__ASSERT(!(val & 0x80000000)); /* this version only handles 31 bits */
-
-	if(val < 0x80) {
-		return FLAC__bitbuffer_write_raw_uint32(bb, val, 8);
-	}
-	else if(val < 0x800) {
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0xC0 | (val>>6), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (val&0x3F), 8);
-	}
-	else if(val < 0x10000) {
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0xE0 | (val>>12), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | ((val>>6)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (val&0x3F), 8);
-	}
-	else if(val < 0x200000) {
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0xF0 | (val>>18), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | ((val>>12)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | ((val>>6)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (val&0x3F), 8);
-	}
-	else if(val < 0x4000000) {
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0xF8 | (val>>24), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | ((val>>18)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | ((val>>12)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | ((val>>6)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (val&0x3F), 8);
-	}
-	else {
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0xFC | (val>>30), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | ((val>>24)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | ((val>>18)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | ((val>>12)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | ((val>>6)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (val&0x3F), 8);
-	}
-
-	return ok;
-}
-
-FLAC__bool FLAC__bitbuffer_write_utf8_uint64(FLAC__BitBuffer *bb, FLAC__uint64 val)
-{
-	FLAC__bool ok = 1;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-
-	FLAC__ASSERT(!(val & FLAC__U64L(0xFFFFFFF000000000))); /* this version only handles 36 bits */
-
-	if(val < 0x80) {
-		return FLAC__bitbuffer_write_raw_uint32(bb, (FLAC__uint32)val, 8);
-	}
-	else if(val < 0x800) {
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0xC0 | (FLAC__uint32)(val>>6), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (FLAC__uint32)(val&0x3F), 8);
-	}
-	else if(val < 0x10000) {
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0xE0 | (FLAC__uint32)(val>>12), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (FLAC__uint32)(val&0x3F), 8);
-	}
-	else if(val < 0x200000) {
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0xF0 | (FLAC__uint32)(val>>18), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (FLAC__uint32)((val>>12)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (FLAC__uint32)(val&0x3F), 8);
-	}
-	else if(val < 0x4000000) {
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0xF8 | (FLAC__uint32)(val>>24), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (FLAC__uint32)((val>>18)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (FLAC__uint32)((val>>12)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (FLAC__uint32)(val&0x3F), 8);
-	}
-	else if(val < 0x80000000) {
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0xFC | (FLAC__uint32)(val>>30), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (FLAC__uint32)((val>>24)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (FLAC__uint32)((val>>18)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (FLAC__uint32)((val>>12)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (FLAC__uint32)(val&0x3F), 8);
-	}
-	else {
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0xFE, 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (FLAC__uint32)((val>>30)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (FLAC__uint32)((val>>24)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (FLAC__uint32)((val>>18)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (FLAC__uint32)((val>>12)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8);
-		ok &= FLAC__bitbuffer_write_raw_uint32(bb, 0x80 | (FLAC__uint32)(val&0x3F), 8);
-	}
-
-	return ok;
-}
-
-FLAC__bool FLAC__bitbuffer_zero_pad_to_byte_boundary(FLAC__BitBuffer *bb)
-{
-	/* 0-pad to byte boundary */
-	if(bb->bits & 7u)
-		return FLAC__bitbuffer_write_zeroes(bb, 8 - (bb->bits & 7u));
-	else
-		return true;
-}
-
-FLAC__bool FLAC__bitbuffer_peek_bit(FLAC__BitBuffer *bb, unsigned *val, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data)
-{
-	/* to avoid a drastic speed penalty we don't:
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-	FLAC__ASSERT(bb->bits == 0);
-	*/
-
-	while(1) {
-		if(bb->total_consumed_bits < bb->total_bits) {
-			*val = (bb->buffer[bb->consumed_blurbs] & BLURB_BIT_TO_MASK(bb->consumed_bits))? 1 : 0;
-			return true;
-		}
-		else {
-			if(!bitbuffer_read_from_client_(bb, read_callback, client_data))
-				return false;
-		}
-	}
-}
-
-FLAC__bool FLAC__bitbuffer_read_bit(FLAC__BitBuffer *bb, unsigned *val, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data)
-{
-	/* to avoid a drastic speed penalty we don't:
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-	FLAC__ASSERT(bb->bits == 0);
-	*/
-
-	while(1) {
-		if(bb->total_consumed_bits < bb->total_bits) {
-			*val = (bb->buffer[bb->consumed_blurbs] & BLURB_BIT_TO_MASK(bb->consumed_bits))? 1 : 0;
-			bb->consumed_bits++;
-			if(bb->consumed_bits == FLAC__BITS_PER_BLURB) {
-				CRC16_UPDATE_BLURB(bb, bb->buffer[bb->consumed_blurbs], bb->read_crc16);
-				bb->consumed_blurbs++;
-				bb->consumed_bits = 0;
-			}
-			bb->total_consumed_bits++;
-			return true;
-		}
-		else {
-			if(!bitbuffer_read_from_client_(bb, read_callback, client_data))
-				return false;
-		}
-	}
-}
-
-FLAC__bool FLAC__bitbuffer_read_bit_to_uint32(FLAC__BitBuffer *bb, FLAC__uint32 *val, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data)
-{
-	/* to avoid a drastic speed penalty we don't:
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-	FLAC__ASSERT(bb->bits == 0);
-	*/
-
-	while(1) {
-		if(bb->total_consumed_bits < bb->total_bits) {
-			*val <<= 1;
-			*val |= (bb->buffer[bb->consumed_blurbs] & BLURB_BIT_TO_MASK(bb->consumed_bits))? 1 : 0;
-			bb->consumed_bits++;
-			if(bb->consumed_bits == FLAC__BITS_PER_BLURB) {
-				CRC16_UPDATE_BLURB(bb, bb->buffer[bb->consumed_blurbs], bb->read_crc16);
-				bb->consumed_blurbs++;
-				bb->consumed_bits = 0;
-			}
-			bb->total_consumed_bits++;
-			return true;
-		}
-		else {
-			if(!bitbuffer_read_from_client_(bb, read_callback, client_data))
-				return false;
-		}
-	}
-}
-
-FLAC__bool FLAC__bitbuffer_read_bit_to_uint64(FLAC__BitBuffer *bb, FLAC__uint64 *val, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data)
-{
-	/* to avoid a drastic speed penalty we don't:
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-	FLAC__ASSERT(bb->bits == 0);
-	*/
-
-	while(1) {
-		if(bb->total_consumed_bits < bb->total_bits) {
-			*val <<= 1;
-			*val |= (bb->buffer[bb->consumed_blurbs] & BLURB_BIT_TO_MASK(bb->consumed_bits))? 1 : 0;
-			bb->consumed_bits++;
-			if(bb->consumed_bits == FLAC__BITS_PER_BLURB) {
-				CRC16_UPDATE_BLURB(bb, bb->buffer[bb->consumed_blurbs], bb->read_crc16);
-				bb->consumed_blurbs++;
-				bb->consumed_bits = 0;
-			}
-			bb->total_consumed_bits++;
-			return true;
-		}
-		else {
-			if(!bitbuffer_read_from_client_(bb, read_callback, client_data))
-				return false;
-		}
-	}
-}
-
-FLaC__INLINE FLAC__bool FLAC__bitbuffer_read_raw_uint32(FLAC__BitBuffer *bb, FLAC__uint32 *val, const unsigned bits, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data)
-#ifdef FLAC__NO_MANUAL_INLINING
-{
-	unsigned i;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-
-	FLAC__ASSERT(bits <= 32);
-
-	*val = 0;
-	for(i = 0; i < bits; i++) {
-		if(!FLAC__bitbuffer_read_bit_to_uint32(bb, val, read_callback, client_data))
-			return false;
-	}
-	return true;
-}
-#else
-{
-	unsigned i, bits_ = bits;
-	FLAC__uint32 v = 0;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-
-	FLAC__ASSERT(bits <= 32);
-	FLAC__ASSERT((bb->capacity*FLAC__BITS_PER_BLURB) * 2 >= bits);
-
-	if(bits == 0) {
-		*val = 0;
-		return true;
-	}
-
-	while(bb->total_consumed_bits + bits > bb->total_bits) {
-		if(!bitbuffer_read_from_client_(bb, read_callback, client_data))
-			return false;
-	}
-#if FLAC__BITS_PER_BLURB > 8
-	if(bb->bits == 0 || bb->consumed_blurbs < bb->blurbs) { /*@@@ comment on why this is here*/
-#endif
-		if(bb->consumed_bits) {
-			i = FLAC__BITS_PER_BLURB - bb->consumed_bits;
-			if(i <= bits_) {
-				v = bb->buffer[bb->consumed_blurbs] & (FLAC__BLURB_ALL_ONES >> bb->consumed_bits);
-				bits_ -= i;
-				CRC16_UPDATE_BLURB(bb, bb->buffer[bb->consumed_blurbs], bb->read_crc16);
-				bb->consumed_blurbs++;
-				bb->consumed_bits = 0;
-				/* we hold off updating bb->total_consumed_bits until the end */
-			}
-			else {
-				*val = (bb->buffer[bb->consumed_blurbs] & (FLAC__BLURB_ALL_ONES >> bb->consumed_bits)) >> (i-bits_);
-				bb->consumed_bits += bits_;
-				bb->total_consumed_bits += bits_;
-				return true;
-			}
-		}
-#if FLAC__BITS_PER_BLURB == 32
-		/* note that we know bits_ cannot be > 32 because of previous assertions */
-		if(bits_ == FLAC__BITS_PER_BLURB) {
-			v = bb->buffer[bb->consumed_blurbs];
-			CRC16_UPDATE_BLURB(bb, v, bb->read_crc16);
-			bb->consumed_blurbs++;
-			/* bb->consumed_bits is already 0 */
-			bb->total_consumed_bits += bits;
-			*val = v;
-			return true;
-		}
-#else
-		while(bits_ >= FLAC__BITS_PER_BLURB) {
-			v <<= FLAC__BITS_PER_BLURB;
-			v |= bb->buffer[bb->consumed_blurbs];
-			bits_ -= FLAC__BITS_PER_BLURB;
-			CRC16_UPDATE_BLURB(bb, bb->buffer[bb->consumed_blurbs], bb->read_crc16);
-			bb->consumed_blurbs++;
-			/* bb->consumed_bits is already 0 */
-			/* we hold off updating bb->total_consumed_bits until the end */
-		}
-#endif
-		if(bits_ > 0) {
-			v <<= bits_;
-			v |= (bb->buffer[bb->consumed_blurbs] >> (FLAC__BITS_PER_BLURB-bits_));
-			bb->consumed_bits = bits_;
-			/* we hold off updating bb->total_consumed_bits until the end */
-		}
-		bb->total_consumed_bits += bits;
-		*val = v;
-#if FLAC__BITS_PER_BLURB > 8
-	}
-	else {
-		*val = 0;
-		for(i = 0; i < bits; i++) {
-			if(!FLAC__bitbuffer_read_bit_to_uint32(bb, val, read_callback, client_data))
-				return false;
-		}
-	}
-#endif
-	return true;
-}
-#endif
-
-FLAC__bool FLAC__bitbuffer_read_raw_int32(FLAC__BitBuffer *bb, FLAC__int32 *val, const unsigned bits, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data)
-#ifdef FLAC__NO_MANUAL_INLINING
-{
-	unsigned i;
-	FLAC__uint32 v;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-
-	FLAC__ASSERT(bits <= 32);
-
-	if(bits == 0) {
-		*val = 0;
-		return true;
-	}
-
-	v = 0;
-	for(i = 0; i < bits; i++) {
-		if(!FLAC__bitbuffer_read_bit_to_uint32(bb, &v, read_callback, client_data))
-			return false;
-	}
-
-	/* fix the sign */
-	i = 32 - bits;
-	if(i) {
-		v <<= i;
-		*val = (FLAC__int32)v;
-		*val >>= i;
-	}
-	else
-		*val = (FLAC__int32)v;
-
-	return true;
-}
-#else
-{
-	unsigned i, bits_ = bits;
-	FLAC__uint32 v = 0;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-
-	FLAC__ASSERT(bits <= 32);
-	FLAC__ASSERT((bb->capacity*FLAC__BITS_PER_BLURB) * 2 >= bits);
-
-	if(bits == 0) {
-		*val = 0;
-		return true;
-	}
-
-	while(bb->total_consumed_bits + bits > bb->total_bits) {
-		if(!bitbuffer_read_from_client_(bb, read_callback, client_data))
-			return false;
-	}
-#if FLAC__BITS_PER_BLURB > 8
-	if(bb->bits == 0 || bb->consumed_blurbs < bb->blurbs) { /*@@@ comment on why this is here*/
-#endif
-		if(bb->consumed_bits) {
-			i = FLAC__BITS_PER_BLURB - bb->consumed_bits;
-			if(i <= bits_) {
-				v = bb->buffer[bb->consumed_blurbs] & (FLAC__BLURB_ALL_ONES >> bb->consumed_bits);
-				bits_ -= i;
-				CRC16_UPDATE_BLURB(bb, bb->buffer[bb->consumed_blurbs], bb->read_crc16);
-				bb->consumed_blurbs++;
-				bb->consumed_bits = 0;
-				/* we hold off updating bb->total_consumed_bits until the end */
-			}
-			else {
-				/* bits_ must be < FLAC__BITS_PER_BLURB-1 if we get to here */
-				v = (bb->buffer[bb->consumed_blurbs] & (FLAC__BLURB_ALL_ONES >> bb->consumed_bits));
-				v <<= (32-i);
-				*val = (FLAC__int32)v;
-				*val >>= (32-bits_);
-				bb->consumed_bits += bits_;
-				bb->total_consumed_bits += bits_;
-				return true;
-			}
-		}
-#if FLAC__BITS_PER_BLURB == 32
-		/* note that we know bits_ cannot be > 32 because of previous assertions */
-		if(bits_ == FLAC__BITS_PER_BLURB) {
-			v = bb->buffer[bb->consumed_blurbs];
-			bits_ = 0;
-			CRC16_UPDATE_BLURB(bb, v, bb->read_crc16);
-			bb->consumed_blurbs++;
-			/* bb->consumed_bits is already 0 */
-			/* we hold off updating bb->total_consumed_bits until the end */
-		}
-#else
-		while(bits_ >= FLAC__BITS_PER_BLURB) {
-			v <<= FLAC__BITS_PER_BLURB;
-			v |= bb->buffer[bb->consumed_blurbs];
-			bits_ -= FLAC__BITS_PER_BLURB;
-			CRC16_UPDATE_BLURB(bb, bb->buffer[bb->consumed_blurbs], bb->read_crc16);
-			bb->consumed_blurbs++;
-			/* bb->consumed_bits is already 0 */
-			/* we hold off updating bb->total_consumed_bits until the end */
-		}
-#endif
-		if(bits_ > 0) {
-			v <<= bits_;
-			v |= (bb->buffer[bb->consumed_blurbs] >> (FLAC__BITS_PER_BLURB-bits_));
-			bb->consumed_bits = bits_;
-			/* we hold off updating bb->total_consumed_bits until the end */
-		}
-		bb->total_consumed_bits += bits;
-#if FLAC__BITS_PER_BLURB > 8
-	}
-	else {
-		for(i = 0; i < bits; i++) {
-			if(!FLAC__bitbuffer_read_bit_to_uint32(bb, &v, read_callback, client_data))
-				return false;
-		}
-	}
-#endif
-
-	/* fix the sign */
-	i = 32 - bits;
-	if(i) {
-		v <<= i;
-		*val = (FLAC__int32)v;
-		*val >>= i;
-	}
-	else
-		*val = (FLAC__int32)v;
-
-	return true;
-}
-#endif
-
-FLAC__bool FLAC__bitbuffer_read_raw_uint64(FLAC__BitBuffer *bb, FLAC__uint64 *val, const unsigned bits, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data)
-#ifdef FLAC__NO_MANUAL_INLINING
-{
-	unsigned i;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-
-	FLAC__ASSERT(bits <= 64);
-
-	*val = 0;
-	for(i = 0; i < bits; i++) {
-		if(!FLAC__bitbuffer_read_bit_to_uint64(bb, val, read_callback, client_data))
-			return false;
-	}
-	return true;
-}
-#else
-{
-	unsigned i, bits_ = bits;
-	FLAC__uint64 v = 0;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-
-	FLAC__ASSERT(bits <= 64);
-	FLAC__ASSERT((bb->capacity*FLAC__BITS_PER_BLURB) * 2 >= bits);
-
-	if(bits == 0) {
-		*val = 0;
-		return true;
-	}
-
-	while(bb->total_consumed_bits + bits > bb->total_bits) {
-		if(!bitbuffer_read_from_client_(bb, read_callback, client_data))
-			return false;
-	}
-#if FLAC__BITS_PER_BLURB > 8
-	if(bb->bits == 0 || bb->consumed_blurbs < bb->blurbs) { /*@@@ comment on why this is here*/
-#endif
-		if(bb->consumed_bits) {
-			i = FLAC__BITS_PER_BLURB - bb->consumed_bits;
-			if(i <= bits_) {
-				v = bb->buffer[bb->consumed_blurbs] & (FLAC__BLURB_ALL_ONES >> bb->consumed_bits);
-				bits_ -= i;
-				CRC16_UPDATE_BLURB(bb, bb->buffer[bb->consumed_blurbs], bb->read_crc16);
-				bb->consumed_blurbs++;
-				bb->consumed_bits = 0;
-				/* we hold off updating bb->total_consumed_bits until the end */
-			}
-			else {
-				*val = (bb->buffer[bb->consumed_blurbs] & (FLAC__BLURB_ALL_ONES >> bb->consumed_bits)) >> (i-bits_);
-				bb->consumed_bits += bits_;
-				bb->total_consumed_bits += bits_;
-				return true;
-			}
-		}
-		while(bits_ >= FLAC__BITS_PER_BLURB) {
-			v <<= FLAC__BITS_PER_BLURB;
-			v |= bb->buffer[bb->consumed_blurbs];
-			bits_ -= FLAC__BITS_PER_BLURB;
-			CRC16_UPDATE_BLURB(bb, bb->buffer[bb->consumed_blurbs], bb->read_crc16);
-			bb->consumed_blurbs++;
-			/* bb->consumed_bits is already 0 */
-			/* we hold off updating bb->total_consumed_bits until the end */
-		}
-		if(bits_ > 0) {
-			v <<= bits_;
-			v |= (bb->buffer[bb->consumed_blurbs] >> (FLAC__BITS_PER_BLURB-bits_));
-			bb->consumed_bits = bits_;
-			/* we hold off updating bb->total_consumed_bits until the end */
-		}
-		bb->total_consumed_bits += bits;
-		*val = v;
-#if FLAC__BITS_PER_BLURB > 8
-	}
-	else {
-		*val = 0;
-		for(i = 0; i < bits; i++) {
-			if(!FLAC__bitbuffer_read_bit_to_uint64(bb, val, read_callback, client_data))
-				return false;
-		}
-	}
-#endif
-	return true;
-}
-#endif
-
-#if 0 /* UNUSED */
-FLAC__bool FLAC__bitbuffer_read_raw_int64(FLAC__BitBuffer *bb, FLAC__int64 *val, const unsigned bits, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data)
-#ifdef FLAC__NO_MANUAL_INLINING
-{
-	unsigned i;
-	FLAC__uint64 v;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-
-	FLAC__ASSERT(bits <= 64);
-
-	v = 0;
-	for(i = 0; i < bits; i++) {
-		if(!FLAC__bitbuffer_read_bit_to_uint64(bb, &v, read_callback, client_data))
-			return false;
-	}
-	/* fix the sign */
-	i = 64 - bits;
-	if(i) {
-		v <<= i;
-		*val = (FLAC__int64)v;
-		*val >>= i;
-	}
-	else
-		*val = (FLAC__int64)v;
-
-	return true;
-}
-#else
-{
-	unsigned i, bits_ = bits;
-	FLAC__uint64 v = 0;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-
-	FLAC__ASSERT(bits <= 64);
-	FLAC__ASSERT((bb->capacity*FLAC__BITS_PER_BLURB) * 2 >= bits);
-
-	if(bits == 0) {
-		*val = 0;
-		return true;
-	}
-
-	while(bb->total_consumed_bits + bits > bb->total_bits) {
-		if(!bitbuffer_read_from_client_(bb, read_callback, client_data))
-			return false;
-	}
-#if FLAC__BITS_PER_BLURB > 8
-	if(bb->bits == 0 || bb->consumed_blurbs < bb->blurbs) { /*@@@ comment on why this is here*/
-#endif
-		if(bb->consumed_bits) {
-			i = FLAC__BITS_PER_BLURB - bb->consumed_bits;
-			if(i <= bits_) {
-				v = bb->buffer[bb->consumed_blurbs] & (FLAC__BLURB_ALL_ONES >> bb->consumed_bits);
-				bits_ -= i;
-				CRC16_UPDATE_BLURB(bb, bb->buffer[bb->consumed_blurbs], bb->read_crc16);
-				bb->consumed_blurbs++;
-				bb->consumed_bits = 0;
-				/* we hold off updating bb->total_consumed_bits until the end */
-			}
-			else {
-				/* bits_ must be < FLAC__BITS_PER_BLURB-1 if we get to here */
-				v = (bb->buffer[bb->consumed_blurbs] & (FLAC__BLURB_ALL_ONES >> bb->consumed_bits));
-				v <<= (64-i);
-				*val = (FLAC__int64)v;
-				*val >>= (64-bits_);
-				bb->consumed_bits += bits_;
-				bb->total_consumed_bits += bits_;
-				return true;
-			}
-		}
-		while(bits_ >= FLAC__BITS_PER_BLURB) {
-			v <<= FLAC__BITS_PER_BLURB;
-			v |= bb->buffer[bb->consumed_blurbs];
-			bits_ -= FLAC__BITS_PER_BLURB;
-			CRC16_UPDATE_BLURB(bb, bb->buffer[bb->consumed_blurbs], bb->read_crc16);
-			bb->consumed_blurbs++;
-			/* bb->consumed_bits is already 0 */
-			/* we hold off updating bb->total_consumed_bits until the end */
-		}
-		if(bits_ > 0) {
-			v <<= bits_;
-			v |= (bb->buffer[bb->consumed_blurbs] >> (FLAC__BITS_PER_BLURB-bits_));
-			bb->consumed_bits = bits_;
-			/* we hold off updating bb->total_consumed_bits until the end */
-		}
-		bb->total_consumed_bits += bits;
-#if FLAC__BITS_PER_BLURB > 8
-	}
-	else {
-		for(i = 0; i < bits; i++) {
-			if(!FLAC__bitbuffer_read_bit_to_uint64(bb, &v, read_callback, client_data))
-				return false;
-		}
-	}
-#endif
-
-	/* fix the sign */
-	i = 64 - bits;
-	if(i) {
-		v <<= i;
-		*val = (FLAC__int64)v;
-		*val >>= i;
-	}
-	else
-		*val = (FLAC__int64)v;
-
-	return true;
-}
-#endif
-#endif
-
-FLaC__INLINE FLAC__bool FLAC__bitbuffer_read_raw_uint32_little_endian(FLAC__BitBuffer *bb, FLAC__uint32 *val, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data)
-{
-	FLAC__uint32 x8, x32 = 0;
-
-	/* this doesn't need to be that fast as currently it is only used for vorbis comments */
-
-	if(!FLAC__bitbuffer_read_raw_uint32(bb, &x32, 8, read_callback, client_data))
-		return false;
-
-	if(!FLAC__bitbuffer_read_raw_uint32(bb, &x8, 8, read_callback, client_data))
-		return false;
-	x32 |= (x8 << 8);
-
-	if(!FLAC__bitbuffer_read_raw_uint32(bb, &x8, 8, read_callback, client_data))
-		return false;
-	x32 |= (x8 << 16);
-
-	if(!FLAC__bitbuffer_read_raw_uint32(bb, &x8, 8, read_callback, client_data))
-		return false;
-	x32 |= (x8 << 24);
-
-	*val = x32;
-	return true;
-}
-
-FLAC__bool FLAC__bitbuffer_skip_bits_no_crc(FLAC__BitBuffer *bb, unsigned bits, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data)
-{
-	/*
-	 * @@@ a slightly faster implementation is possible but
-	 * probably not that useful since this is only called a
-	 * couple of times in the metadata readers.
-	 */
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-
-	if(bits > 0) {
-		const unsigned n = bb->consumed_bits & 7;
-	   	unsigned m;
-		FLAC__uint32 x;
-
-		if(n != 0) {
-			m = min(8-n, bits);
-			if(!FLAC__bitbuffer_read_raw_uint32(bb, &x, m, read_callback, client_data))
-				return false;
-			bits -= m;
-		}
-		m = bits / 8;
-		if(m > 0) {
-			if(!FLAC__bitbuffer_read_byte_block_aligned_no_crc(bb, 0, m, read_callback, client_data))
-				return false;
-			bits %= 8;
-		}
-		if(bits > 0) {
-			if(!FLAC__bitbuffer_read_raw_uint32(bb, &x, bits, read_callback, client_data))
-				return false;
-		}
-	}
-
-	return true;
-}
-
-FLAC__bool FLAC__bitbuffer_read_byte_block_aligned_no_crc(FLAC__BitBuffer *bb, FLAC__byte *val, unsigned nvals, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data)
-{
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-	FLAC__ASSERT(FLAC__bitbuffer_is_byte_aligned(bb));
-	FLAC__ASSERT(FLAC__bitbuffer_is_consumed_byte_aligned(bb));
-#if FLAC__BITS_PER_BLURB == 8
-	while(nvals > 0) {
-		unsigned chunk = min(nvals, bb->blurbs - bb->consumed_blurbs);
-		if(chunk == 0) {
-			if(!bitbuffer_read_from_client_(bb, read_callback, client_data))
-				return false;
-		}
-		else {
-			if(0 != val) {
-				memcpy(val, bb->buffer + bb->consumed_blurbs, FLAC__BYTES_PER_BLURB * chunk);
-				val += FLAC__BYTES_PER_BLURB * chunk;
-			}
-			nvals -= chunk;
-			bb->consumed_blurbs += chunk;
-			bb->total_consumed_bits = (bb->consumed_blurbs << FLAC__BITS_PER_BLURB_LOG2);
-		}
-	}
-#else
-	@@@ need to write this still
-	FLAC__ASSERT(0);
-#endif
-
-	return true;
-}
-
-FLaC__INLINE FLAC__bool FLAC__bitbuffer_read_unary_unsigned(FLAC__BitBuffer *bb, unsigned *val, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data)
-#ifdef FLAC__NO_MANUAL_INLINING
-{
-	unsigned bit, val_ = 0;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-
-	while(1) {
-		if(!FLAC__bitbuffer_read_bit(bb, &bit, read_callback, client_data))
-			return false;
-		if(bit)
-			break;
-		else
-			val_++;
-	}
-	*val = val_;
-	return true;
-}
-#else
-{
-	unsigned i, val_ = 0;
-	unsigned total_blurbs_ = (bb->total_bits + (FLAC__BITS_PER_BLURB-1)) / FLAC__BITS_PER_BLURB;
-	FLAC__blurb b;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-
-#if FLAC__BITS_PER_BLURB > 8
-	if(bb->bits == 0 || bb->consumed_blurbs < bb->blurbs) { /*@@@ comment on why this is here*/
-#endif
-		if(bb->consumed_bits) {
-			b = bb->buffer[bb->consumed_blurbs] << bb->consumed_bits;
-			if(b) {
-				for(i = 0; !(b & FLAC__BLURB_TOP_BIT_ONE); i++)
-					b <<= 1;
-				*val = i;
-				i++;
-				bb->consumed_bits += i;
-				bb->total_consumed_bits += i;
-				if(bb->consumed_bits == FLAC__BITS_PER_BLURB) {
-					CRC16_UPDATE_BLURB(bb, bb->buffer[bb->consumed_blurbs], bb->read_crc16);
-					bb->consumed_blurbs++;
-					bb->consumed_bits = 0;
-				}
-				return true;
-			}
-			else {
-				val_ = FLAC__BITS_PER_BLURB - bb->consumed_bits;
-				CRC16_UPDATE_BLURB(bb, bb->buffer[bb->consumed_blurbs], bb->read_crc16);
-				bb->consumed_blurbs++;
-				bb->consumed_bits = 0;
-				bb->total_consumed_bits += val_;
-			}
-		}
-		while(1) {
-			if(bb->consumed_blurbs >= total_blurbs_) {
-				if(!bitbuffer_read_from_client_(bb, read_callback, client_data))
-					return false;
-				total_blurbs_ = (bb->total_bits + (FLAC__BITS_PER_BLURB-1)) / FLAC__BITS_PER_BLURB;
-			}
-			b = bb->buffer[bb->consumed_blurbs];
-			if(b) {
-				for(i = 0; !(b & FLAC__BLURB_TOP_BIT_ONE); i++)
-					b <<= 1;
-				val_ += i;
-				i++;
-				bb->consumed_bits = i;
-				*val = val_;
-				if(i == FLAC__BITS_PER_BLURB) {
-					CRC16_UPDATE_BLURB(bb, bb->buffer[bb->consumed_blurbs], bb->read_crc16);
-					bb->consumed_blurbs++;
-					bb->consumed_bits = 0;
-				}
-				bb->total_consumed_bits += i;
-				return true;
-			}
-			else {
-				val_ += FLAC__BITS_PER_BLURB;
-				CRC16_UPDATE_BLURB(bb, 0, bb->read_crc16);
-				bb->consumed_blurbs++;
-				/* bb->consumed_bits is already 0 */
-				bb->total_consumed_bits += FLAC__BITS_PER_BLURB;
-			}
-		}
-#if FLAC__BITS_PER_BLURB > 8
-	}
-	else {
-		while(1) {
-			if(!FLAC__bitbuffer_read_bit(bb, &i, read_callback, client_data))
-				return false;
-			if(i)
-				break;
-			else
-				val_++;
-		}
-		*val = val_;
-		return true;
-	}
-#endif
-}
-#endif
-
-FLAC__bool FLAC__bitbuffer_read_rice_signed(FLAC__BitBuffer *bb, int *val, unsigned parameter, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data)
-{
-	FLAC__uint32 lsbs = 0, msbs = 0;
-	unsigned uval;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-	FLAC__ASSERT(parameter <= 31);
-
-	/* read the unary MSBs and end bit */
-	if(!FLAC__bitbuffer_read_unary_unsigned(bb, &msbs, read_callback, client_data))
-		return false;
-
-	/* read the binary LSBs */
-	if(!FLAC__bitbuffer_read_raw_uint32(bb, &lsbs, parameter, read_callback, client_data))
-		return false;
-
-	/* compose the value */
-	uval = (msbs << parameter) | lsbs;
-	if(uval & 1)
-		*val = -((int)(uval >> 1)) - 1;
-	else
-		*val = (int)(uval >> 1);
-
-	return true;
-}
-
-FLAC__bool FLAC__bitbuffer_read_rice_signed_block(FLAC__BitBuffer *bb, int vals[], unsigned nvals, unsigned parameter, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data)
-#ifdef FLAC__OLD_MSVC_FLAVOR
-{
-	const FLAC__blurb *buffer = bb->buffer;
-
-	unsigned i, j, val_i = 0;
-	unsigned cbits = 0, uval = 0, msbs = 0, lsbs_left = 0;
-	FLAC__blurb blurb, save_blurb;
-	unsigned state = 0; /* 0 = getting unary MSBs, 1 = getting binary LSBs */
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-	FLAC__ASSERT(parameter <= 31);
-
-	if(nvals == 0)
-		return true;
-
-	i = bb->consumed_blurbs;
-	/*
-	 * We unroll the main loop to take care of partially consumed blurbs here.
-	 */
-	if(bb->consumed_bits > 0) {
-		save_blurb = blurb = buffer[i];
-		cbits = bb->consumed_bits;
-		blurb <<= cbits;
-
-		while(1) {
-			if(state == 0) {
-				if(blurb) {
-					for(j = 0; !(blurb & FLAC__BLURB_TOP_BIT_ONE); j++)
-						blurb <<= 1;
-					msbs += j;
-
-					/* dispose of the unary end bit */
-					blurb <<= 1;
-					j++;
-					cbits += j;
-
-					uval = 0;
-					lsbs_left = parameter;
-					state++;
-					if(cbits == FLAC__BITS_PER_BLURB) {
-						cbits = 0;
-						CRC16_UPDATE_BLURB(bb, save_blurb, bb->read_crc16);
-						break;
-					}
-				}
-				else {
-					msbs += FLAC__BITS_PER_BLURB - cbits;
-					cbits = 0;
-					CRC16_UPDATE_BLURB(bb, save_blurb, bb->read_crc16);
-					break;
-				}
-			}
-			else {
-				const unsigned available_bits = FLAC__BITS_PER_BLURB - cbits;
-				if(lsbs_left >= available_bits) {
-					uval <<= available_bits;
-					uval |= (blurb >> cbits);
-					cbits = 0;
-					CRC16_UPDATE_BLURB(bb, save_blurb, bb->read_crc16);
-
-					if(lsbs_left == available_bits) {
-						/* compose the value */
-						uval |= (msbs << parameter);
-						if(uval & 1)
-							vals[val_i++] = -((int)(uval >> 1)) - 1;
-						else
-							vals[val_i++] = (int)(uval >> 1);
-						if(val_i == nvals)
-							break;
-
-						msbs = 0;
-						state = 0;
-					}
-
-					lsbs_left -= available_bits;
-					break;
-				}
-				else {
-					uval <<= lsbs_left;
-					uval |= (blurb >> (FLAC__BITS_PER_BLURB - lsbs_left));
-					blurb <<= lsbs_left;
-					cbits += lsbs_left;
-
-					/* compose the value */
-					uval |= (msbs << parameter);
-					if(uval & 1)
-						vals[val_i++] = -((int)(uval >> 1)) - 1;
-					else
-						vals[val_i++] = (int)(uval >> 1);
-					if(val_i == nvals) {
-						/* back up one if we exited the for loop because we read all nvals but the end came in the middle of a blurb */
-						i--;
-						break;
-					}
-
-					msbs = 0;
-					state = 0;
-				}
-			}
-		}
-		i++;
-
-		bb->consumed_blurbs = i;
-		bb->consumed_bits = cbits;
-		bb->total_consumed_bits = (i << FLAC__BITS_PER_BLURB_LOG2) | cbits;
-	}
-
-	/*
-	 * Now that we are blurb-aligned the logic is slightly simpler
-	 */
-	while(val_i < nvals) {
-		for( ; i < bb->blurbs && val_i < nvals; i++) {
-			save_blurb = blurb = buffer[i];
-			cbits = 0;
-			while(1) {
-				if(state == 0) {
-					if(blurb) {
-						for(j = 0; !(blurb & FLAC__BLURB_TOP_BIT_ONE); j++)
-							blurb <<= 1;
-						msbs += j;
-
-						/* dispose of the unary end bit */
-						blurb <<= 1;
-						j++;
-						cbits += j;
-
-						uval = 0;
-						lsbs_left = parameter;
-						state++;
-						if(cbits == FLAC__BITS_PER_BLURB) {
-							cbits = 0;
-							CRC16_UPDATE_BLURB(bb, save_blurb, bb->read_crc16);
-							break;
-						}
-					}
-					else {
-						msbs += FLAC__BITS_PER_BLURB - cbits;
-						cbits = 0;
-						CRC16_UPDATE_BLURB(bb, save_blurb, bb->read_crc16);
-						break;
-					}
-				}
-				else {
-					const unsigned available_bits = FLAC__BITS_PER_BLURB - cbits;
-					if(lsbs_left >= available_bits) {
-						uval <<= available_bits;
-						uval |= (blurb >> cbits);
-						cbits = 0;
-						CRC16_UPDATE_BLURB(bb, save_blurb, bb->read_crc16);
-
-						if(lsbs_left == available_bits) {
-							/* compose the value */
-							uval |= (msbs << parameter);
-							if(uval & 1)
-								vals[val_i++] = -((int)(uval >> 1)) - 1;
-							else
-								vals[val_i++] = (int)(uval >> 1);
-							if(val_i == nvals)
-								break;
-
-							msbs = 0;
-							state = 0;
-						}
-
-						lsbs_left -= available_bits;
-						break;
-					}
-					else {
-						uval <<= lsbs_left;
-						uval |= (blurb >> (FLAC__BITS_PER_BLURB - lsbs_left));
-						blurb <<= lsbs_left;
-						cbits += lsbs_left;
-
-						/* compose the value */
-						uval |= (msbs << parameter);
-						if(uval & 1)
-							vals[val_i++] = -((int)(uval >> 1)) - 1;
-						else
-							vals[val_i++] = (int)(uval >> 1);
-						if(val_i == nvals) {
-							/* back up one if we exited the for loop because we read all nvals but the end came in the middle of a blurb */
-							i--;
-							break;
-						}
-
-						msbs = 0;
-						state = 0;
-					}
-				}
-			}
-		}
-		bb->consumed_blurbs = i;
-		bb->consumed_bits = cbits;
-		bb->total_consumed_bits = (i << FLAC__BITS_PER_BLURB_LOG2) | cbits;
-		if(val_i < nvals) {
-			if(!bitbuffer_read_from_client_(bb, read_callback, client_data))
-				return false;
-			/* these must be zero because we can only get here if we got to the end of the buffer */
-			FLAC__ASSERT(bb->consumed_blurbs == 0);
-			FLAC__ASSERT(bb->consumed_bits == 0);
-			i = 0;
-		}
-	}
-
-	return true;
-}
-#else
-{
-	const FLAC__blurb *buffer = bb->buffer;
-
-	unsigned i, j, val_i = nvals;
-	unsigned cbits = 0, uval = 0, msbs = 0, lsbs_left = 0;
-	FLAC__blurb blurb, save_blurb;
-	unsigned state = 0; /* 0 = getting unary MSBs, 1 = getting binary LSBs */
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-	FLAC__ASSERT(parameter <= 31);
-
-	if(nvals == 0)
-		return true;
-
-	cbits = bb->consumed_bits;
-	i = bb->consumed_blurbs;
-	while(val_i != 0) {
-		for( ; i < bb->blurbs; i++) {
-			blurb = (save_blurb = buffer[i]) << cbits;
-			while(1) {
-				if(state == 0) {
-					if(blurb) {
-						j = FLAC__ALIGNED_BLURB_UNARY(blurb);
-						msbs += j;
-						j++;
-						cbits += j;
-
-						uval = 0;
-						lsbs_left = parameter;
-						state++;
-						if(cbits == FLAC__BITS_PER_BLURB) {
-							cbits = 0;
-							CRC16_UPDATE_BLURB(bb, save_blurb, bb->read_crc16);
-							break;
-						}
-						blurb <<= j;
-					}
-					else {
-						msbs += FLAC__BITS_PER_BLURB - cbits;
-						cbits = 0;
-						CRC16_UPDATE_BLURB(bb, save_blurb, bb->read_crc16);
-						break;
-					}
-				}
-				else {
-					const unsigned available_bits = FLAC__BITS_PER_BLURB - cbits;
-					if(lsbs_left >= available_bits) {
-						uval <<= available_bits;
-						uval |= (blurb >> cbits);
-						cbits = 0;
-						CRC16_UPDATE_BLURB(bb, save_blurb, bb->read_crc16);
-
-						if(lsbs_left == available_bits) {
-							/* compose the value */
-							uval |= (msbs << parameter);
-							*vals = (int)(uval >> 1 ^ -(int)(uval & 1));
-							--val_i;
-							if(val_i == 0) {
-								i++;
-								goto break2;
-							}
-							++vals;
-
-							msbs = 0;
-							state = 0;
-						}
-
-						lsbs_left -= available_bits;
-						break;
-					}
-					else {
-						cbits += lsbs_left;
-						uval <<= lsbs_left;
-						uval |= (blurb >> (FLAC__BITS_PER_BLURB - lsbs_left));
-						blurb <<= lsbs_left;
-
-						/* compose the value */
-						uval |= (msbs << parameter);
-						*vals = (int)(uval >> 1 ^ -(int)(uval & 1));
-						--val_i;
-						if(val_i == 0)
-							goto break2;
-						++vals;
-
-						msbs = 0;
-						state = 0;
-					}
-				}
-			}
-		}
-break2:
-		bb->consumed_blurbs = i;
-		bb->consumed_bits = cbits;
-		bb->total_consumed_bits = (i << FLAC__BITS_PER_BLURB_LOG2) | cbits;
-		if(val_i != 0) {
-			if(!bitbuffer_read_from_client_(bb, read_callback, client_data))
-				return false;
-			/* these must be zero because we can only get here if we got to the end of the buffer */
-			FLAC__ASSERT(bb->consumed_blurbs == 0);
-			FLAC__ASSERT(bb->consumed_bits == 0);
-			i = 0;
-		}
-	}
-
-	return true;
-}
-#endif
-
-#if 0 /* UNUSED */
-FLAC__bool FLAC__bitbuffer_read_golomb_signed(FLAC__BitBuffer *bb, int *val, unsigned parameter, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data)
-{
-	FLAC__uint32 lsbs = 0, msbs = 0;
-	unsigned bit, uval, k;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-
-	k = FLAC__bitmath_ilog2(parameter);
-
-	/* read the unary MSBs and end bit */
-	if(!FLAC__bitbuffer_read_unary_unsigned(bb, &msbs, read_callback, client_data))
-		return false;
-
-	/* read the binary LSBs */
-	if(!FLAC__bitbuffer_read_raw_uint32(bb, &lsbs, k, read_callback, client_data))
-		return false;
-
-	if(parameter == 1u<<k) {
-		/* compose the value */
-		uval = (msbs << k) | lsbs;
-	}
-	else {
-		unsigned d = (1 << (k+1)) - parameter;
-		if(lsbs >= d) {
-			if(!FLAC__bitbuffer_read_bit(bb, &bit, read_callback, client_data))
-				return false;
-			lsbs <<= 1;
-			lsbs |= bit;
-			lsbs -= d;
-		}
-		/* compose the value */
-		uval = msbs * parameter + lsbs;
-	}
-
-	/* unfold unsigned to signed */
-	if(uval & 1)
-		*val = -((int)(uval >> 1)) - 1;
-	else
-		*val = (int)(uval >> 1);
-
-	return true;
-}
-
-FLAC__bool FLAC__bitbuffer_read_golomb_unsigned(FLAC__BitBuffer *bb, unsigned *val, unsigned parameter, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data)
-{
-	FLAC__uint32 lsbs, msbs = 0;
-	unsigned bit, k;
-
-	FLAC__ASSERT(0 != bb);
-	FLAC__ASSERT(0 != bb->buffer);
-
-	k = FLAC__bitmath_ilog2(parameter);
-
-	/* read the unary MSBs and end bit */
-	if(!FLAC__bitbuffer_read_unary_unsigned(bb, &msbs, read_callback, client_data))
-		return false;
-
-	/* read the binary LSBs */
-	if(!FLAC__bitbuffer_read_raw_uint32(bb, &lsbs, k, read_callback, client_data))
-		return false;
-
-	if(parameter == 1u<<k) {
-		/* compose the value */
-		*val = (msbs << k) | lsbs;
-	}
-	else {
-		unsigned d = (1 << (k+1)) - parameter;
-		if(lsbs >= d) {
-			if(!FLAC__bitbuffer_read_bit(bb, &bit, read_callback, client_data))
-				return false;
-			lsbs <<= 1;
-			lsbs |= bit;
-			lsbs -= d;
-		}
-		/* compose the value */
-		*val = msbs * parameter + lsbs;
-	}
-
-	return true;
-}
-#endif /* UNUSED */
-
-/* on return, if *val == 0xffffffff then the utf-8 sequence was invalid, but the return value will be true */
-FLAC__bool FLAC__bitbuffer_read_utf8_uint32(FLAC__BitBuffer *bb, FLAC__uint32 *val, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data, FLAC__byte *raw, unsigned *rawlen)
-{
-	FLAC__uint32 v = 0;
-	FLAC__uint32 x;
-	unsigned i;
-
-	if(!FLAC__bitbuffer_read_raw_uint32(bb, &x, 8, read_callback, client_data))
-		return false;
-	if(raw)
-		raw[(*rawlen)++] = (FLAC__byte)x;
-	if(!(x & 0x80)) { /* 0xxxxxxx */
-		v = x;
-		i = 0;
-	}
-	else if(x & 0xC0 && !(x & 0x20)) { /* 110xxxxx */
-		v = x & 0x1F;
-		i = 1;
-	}
-	else if(x & 0xE0 && !(x & 0x10)) { /* 1110xxxx */
-		v = x & 0x0F;
-		i = 2;
-	}
-	else if(x & 0xF0 && !(x & 0x08)) { /* 11110xxx */
-		v = x & 0x07;
-		i = 3;
-	}
-	else if(x & 0xF8 && !(x & 0x04)) { /* 111110xx */
-		v = x & 0x03;
-		i = 4;
-	}
-	else if(x & 0xFC && !(x & 0x02)) { /* 1111110x */
-		v = x & 0x01;
-		i = 5;
-	}
-	else {
-		*val = 0xffffffff;
-		return true;
-	}
-	for( ; i; i--) {
-		if(!FLAC__bitbuffer_read_raw_uint32(bb, &x, 8, read_callback, client_data))
-			return false;
-		if(raw)
-			raw[(*rawlen)++] = (FLAC__byte)x;
-		if(!(x & 0x80) || (x & 0x40)) { /* 10xxxxxx */
-			*val = 0xffffffff;
-			return true;
-		}
-		v <<= 6;
-		v |= (x & 0x3F);
-	}
-	*val = v;
-	return true;
-}
-
-/* on return, if *val == 0xffffffffffffffff then the utf-8 sequence was invalid, but the return value will be true */
-FLAC__bool FLAC__bitbuffer_read_utf8_uint64(FLAC__BitBuffer *bb, FLAC__uint64 *val, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data, FLAC__byte *raw, unsigned *rawlen)
-{
-	FLAC__uint64 v = 0;
-	FLAC__uint32 x;
-	unsigned i;
-
-	if(!FLAC__bitbuffer_read_raw_uint32(bb, &x, 8, read_callback, client_data))
-		return false;
-	if(raw)
-		raw[(*rawlen)++] = (FLAC__byte)x;
-	if(!(x & 0x80)) { /* 0xxxxxxx */
-		v = x;
-		i = 0;
-	}
-	else if(x & 0xC0 && !(x & 0x20)) { /* 110xxxxx */
-		v = x & 0x1F;
-		i = 1;
-	}
-	else if(x & 0xE0 && !(x & 0x10)) { /* 1110xxxx */
-		v = x & 0x0F;
-		i = 2;
-	}
-	else if(x & 0xF0 && !(x & 0x08)) { /* 11110xxx */
-		v = x & 0x07;
-		i = 3;
-	}
-	else if(x & 0xF8 && !(x & 0x04)) { /* 111110xx */
-		v = x & 0x03;
-		i = 4;
-	}
-	else if(x & 0xFC && !(x & 0x02)) { /* 1111110x */
-		v = x & 0x01;
-		i = 5;
-	}
-	else if(x & 0xFE && !(x & 0x01)) { /* 11111110 */
-		v = 0;
-		i = 6;
-	}
-	else {
-		*val = FLAC__U64L(0xffffffffffffffff);
-		return true;
-	}
-	for( ; i; i--) {
-		if(!FLAC__bitbuffer_read_raw_uint32(bb, &x, 8, read_callback, client_data))
-			return false;
-		if(raw)
-			raw[(*rawlen)++] = (FLAC__byte)x;
-		if(!(x & 0x80) || (x & 0x40)) { /* 10xxxxxx */
-			*val = FLAC__U64L(0xffffffffffffffff);
-			return true;
-		}
-		v <<= 6;
-		v |= (x & 0x3F);
-	}
-	*val = v;
-	return true;
-}
-
-void FLAC__bitbuffer_dump(const FLAC__BitBuffer *bb, FILE *out)
-{
-	unsigned i, j;
-	if(bb == 0) {
-		fprintf(out, "bitbuffer is NULL\n");
-	}
-	else {
-		fprintf(out, "bitbuffer: capacity=%u blurbs=%u bits=%u total_bits=%u consumed: blurbs=%u, bits=%u, total_bits=%u\n", bb->capacity, bb->blurbs, bb->bits, bb->total_bits, bb->consumed_blurbs, bb->consumed_bits, bb->total_consumed_bits);
-
-		for(i = 0; i < bb->blurbs; i++) {
-			fprintf(out, "%08X: ", i);
-			for(j = 0; j < FLAC__BITS_PER_BLURB; j++)
-				if(i*FLAC__BITS_PER_BLURB+j < bb->total_consumed_bits)
-					fprintf(out, ".");
-				else
-					fprintf(out, "%01u", bb->buffer[i] & (1 << (FLAC__BITS_PER_BLURB-j-1)) ? 1:0);
-			fprintf(out, "\n");
-		}
-		if(bb->bits > 0) {
-			fprintf(out, "%08X: ", i);
-			for(j = 0; j < bb->bits; j++)
-				if(i*FLAC__BITS_PER_BLURB+j < bb->total_consumed_bits)
-					fprintf(out, ".");
-				else
-					fprintf(out, "%01u", bb->buffer[i] & (1 << (bb->bits-j-1)) ? 1:0);
-			fprintf(out, "\n");
-		}
-	}
-}
diff --git a/FLAC/bitmath.c b/FLAC/bitmath.c
index e63ef1175a..8cf3c17b10 100644
--- a/FLAC/bitmath.c
+++ b/FLAC/bitmath.c
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2001,2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,6 +29,10 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
 #include "private/bitmath.h"
 #include "FLAC/assert.h"
 
diff --git a/FLAC/bitreader.c b/FLAC/bitreader.c
new file mode 100644
index 0000000000..7d63e526e0
--- /dev/null
+++ b/FLAC/bitreader.c
@@ -0,0 +1,1376 @@
+/* libFLAC - Free Lossless Audio Codec library
+ * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Xiph.org Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#include <stdlib.h> /* for malloc() */
+#include <string.h> /* for memcpy(), memset() */
+#ifdef _MSC_VER
+#include <winsock.h> /* for ntohl() */
+#elif defined FLAC__SYS_DARWIN
+#include <machine/endian.h> /* for ntohl() */
+#elif defined __MINGW32__
+#include <winsock.h> /* for ntohl() */
+#else
+#include <netinet/in.h> /* for ntohl() */
+#endif
+#include "private/bitmath.h"
+#include "private/bitreader.h"
+#include "private/crc.h"
+#include "FLAC/assert.h"
+
+/* Things should be fastest when this matches the machine word size */
+/* WATCHOUT: if you change this you must also change the following #defines down to COUNT_ZERO_MSBS below to match */
+/* WATCHOUT: there are a few places where the code will not work unless brword is >= 32 bits wide */
+/*           also, some sections currently only have fast versions for 4 or 8 bytes per word */
+typedef FLAC__uint32 brword;
+#define FLAC__BYTES_PER_WORD 4
+#define FLAC__BITS_PER_WORD 32
+#define FLAC__WORD_ALL_ONES ((FLAC__uint32)0xffffffff)
+/* SWAP_BE_WORD_TO_HOST swaps bytes in a brword (which is always big-endian) if necessary to match host byte order */
+#if WORDS_BIGENDIAN
+#define SWAP_BE_WORD_TO_HOST(x) (x)
+#else
+#ifdef _MSC_VER
+#define SWAP_BE_WORD_TO_HOST(x) local_swap32_(x)
+#else
+#define SWAP_BE_WORD_TO_HOST(x) ntohl(x)
+#endif
+#endif
+/* counts the # of zero MSBs in a word */
+#define COUNT_ZERO_MSBS(word) ( \
+	(word) <= 0xffff ? \
+		( (word) <= 0xff? byte_to_unary_table[word] + 24 : byte_to_unary_table[(word) >> 8] + 16 ) : \
+		( (word) <= 0xffffff? byte_to_unary_table[word >> 16] + 8 : byte_to_unary_table[(word) >> 24] ) \
+)
+/* this alternate might be slightly faster on some systems/compilers: */
+#define COUNT_ZERO_MSBS2(word) ( (word) <= 0xff ? byte_to_unary_table[word] + 24 : ((word) <= 0xffff ? byte_to_unary_table[(word) >> 8] + 16 : ((word) <= 0xffffff ? byte_to_unary_table[(word) >> 16] + 8 : byte_to_unary_table[(word) >> 24])) )
+
+
+/*
+ * This should be at least twice as large as the largest number of words
+ * required to represent any 'number' (in any encoding) you are going to
+ * read.  With FLAC this is on the order of maybe a few hundred bits.
+ * If the buffer is smaller than that, the decoder won't be able to read
+ * in a whole number that is in a variable length encoding (e.g. Rice).
+ * But to be practical it should be at least 1K bytes.
+ *
+ * Increase this number to decrease the number of read callbacks, at the
+ * expense of using more memory.  Or decrease for the reverse effect,
+ * keeping in mind the limit from the first paragraph.  The optimal size
+ * also depends on the CPU cache size and other factors; some twiddling
+ * may be necessary to squeeze out the best performance.
+ */
+static const unsigned FLAC__BITREADER_DEFAULT_CAPACITY = 65536u / FLAC__BITS_PER_WORD; /* in words */
+
+static const unsigned char byte_to_unary_table[] = {
+	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+#ifdef min
+#undef min
+#endif
+#define min(x,y) ((x)<(y)?(x):(y))
+#ifdef max
+#undef max
+#endif
+#define max(x,y) ((x)>(y)?(x):(y))
+
+/* adjust for compilers that can't understand using LLU suffix for uint64_t literals */
+#ifdef _MSC_VER
+#define FLAC__U64L(x) x
+#else
+#define FLAC__U64L(x) x##LLU
+#endif
+
+#ifndef FLaC__INLINE
+#define FLaC__INLINE
+#endif
+
+/* WATCHOUT: assembly routines rely on the order in which these fields are declared */
+struct FLAC__BitReader {
+	/* any partially-consumed word at the head will stay right-justified as bits are consumed from the left */
+	/* any incomplete word at the tail will be left-justified, and bytes from the read callback are added on the right */
+	brword *buffer;
+	unsigned capacity; /* in words */
+	unsigned words; /* # of completed words in buffer */
+	unsigned bytes; /* # of bytes in incomplete word at buffer[words] */
+	unsigned consumed_words; /* #words ... */
+	unsigned consumed_bits; /* ... + (#bits of head word) already consumed from the front of buffer */
+	unsigned read_crc16; /* the running frame CRC */
+	unsigned crc16_align; /* the number of bits in the current consumed word that should not be CRC'd */
+	FLAC__BitReaderReadCallback read_callback;
+	void *client_data;
+	FLAC__CPUInfo cpu_info;
+};
+
+#ifdef _MSC_VER
+/* OPT: an MSVC built-in would be better */
+static _inline FLAC__uint32 local_swap32_(FLAC__uint32 x)
+{
+	x = ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF);
+	return (x>>16) | (x<<16);
+}
+static void local_swap32_block_(FLAC__uint32 *start, FLAC__uint32 len)
+{
+	__asm {
+		mov edx, start
+		mov ecx, len
+		test ecx, ecx
+loop1:
+		jz done1
+		mov eax, [edx]
+		bswap eax
+		mov [edx], eax
+		add edx, 4
+		dec ecx
+		jmp short loop1
+done1:
+	}
+}
+#endif
+
+static FLaC__INLINE void crc16_update_word_(FLAC__BitReader *br, brword word)
+{
+	register unsigned crc = br->read_crc16;
+#if FLAC__BYTES_PER_WORD == 4
+	switch(br->crc16_align) {
+		case  0: crc = FLAC__CRC16_UPDATE((unsigned)(word >> 24), crc);
+		case  8: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 16) & 0xff), crc);
+		case 16: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 8) & 0xff), crc);
+		case 24: br->read_crc16 = FLAC__CRC16_UPDATE((unsigned)(word & 0xff), crc);
+	}
+#elif FLAC__BYTES_PER_WORD == 8
+	switch(br->crc16_align) {
+		case  0: crc = FLAC__CRC16_UPDATE((unsigned)(word >> 56), crc);
+		case  8: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 48) & 0xff), crc);
+		case 16: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 40) & 0xff), crc);
+		case 24: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 32) & 0xff), crc);
+		case 32: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 24) & 0xff), crc);
+		case 40: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 16) & 0xff), crc);
+		case 48: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 8) & 0xff), crc);
+		case 56: br->read_crc16 = FLAC__CRC16_UPDATE((unsigned)(word & 0xff), crc);
+	}
+#else
+	for( ; br->crc16_align < FLAC__BITS_PER_WORD; br->crc16_align += 8)
+		crc = FLAC__CRC16_UPDATE((unsigned)((word >> (FLAC__BITS_PER_WORD-8-br->crc16_align)) & 0xff), crc);
+	br->read_crc16 = crc;
+#endif
+	br->crc16_align = 0;
+}
+
+/* would be static except it needs to be called by asm routines */
+FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br)
+{
+	unsigned start, end;
+	size_t bytes;
+	FLAC__byte *target;
+
+	/* first shift the unconsumed buffer data toward the front as much as possible */
+	if(br->consumed_words > 0) {
+		start = br->consumed_words;
+		end = br->words + (br->bytes? 1:0);
+		memmove(br->buffer, br->buffer+start, FLAC__BYTES_PER_WORD * (end - start));
+
+		br->words -= start;
+		br->consumed_words = 0;
+	}
+
+	/*
+	 * set the target for reading, taking into account word alignment and endianness
+	 */
+	bytes = (br->capacity - br->words) * FLAC__BYTES_PER_WORD - br->bytes;
+	if(bytes == 0)
+		return false; /* no space left, buffer is too small; see note for FLAC__BITREADER_DEFAULT_CAPACITY  */
+	target = ((FLAC__byte*)(br->buffer+br->words)) + br->bytes;
+
+	/* before reading, if the existing reader looks like this (say brword is 32 bits wide)
+	 *   bitstream :  11 22 33 44 55            br->words=1 br->bytes=1 (partial tail word is left-justified)
+	 *   buffer[BE]:  11 22 33 44 55 ?? ?? ??   (shown layed out as bytes sequentially in memory)
+	 *   buffer[LE]:  44 33 22 11 ?? ?? ?? 55   (?? being don't-care)
+	 *                               ^^-------target, bytes=3
+	 * on LE machines, have to byteswap the odd tail word so nothing is
+	 * overwritten:
+	 */
+#if WORDS_BIGENDIAN
+#else
+	if(br->bytes)
+		br->buffer[br->words] = SWAP_BE_WORD_TO_HOST(br->buffer[br->words]);
+#endif
+
+	/* now it looks like:
+	 *   bitstream :  11 22 33 44 55            br->words=1 br->bytes=1
+	 *   buffer[BE]:  11 22 33 44 55 ?? ?? ??
+	 *   buffer[LE]:  44 33 22 11 55 ?? ?? ??
+	 *                               ^^-------target, bytes=3
+	 */
+
+	/* read in the data; note that the callback may return a smaller number of bytes */
+	if(!br->read_callback(target, &bytes, br->client_data))
+		return false;
+
+	/* after reading bytes 66 77 88 99 AA BB CC DD EE FF from the client:
+	 *   bitstream :  11 22 33 44 55 66 77 88 99 AA BB CC DD EE FF
+	 *   buffer[BE]:  11 22 33 44 55 66 77 88 99 AA BB CC DD EE FF ??
+	 *   buffer[LE]:  44 33 22 11 55 66 77 88 99 AA BB CC DD EE FF ??
+	 * now have to byteswap on LE machines:
+	 */
+#if WORDS_BIGENDIAN
+#else
+	end = (br->words*FLAC__BYTES_PER_WORD + br->bytes + bytes + (FLAC__BYTES_PER_WORD-1)) / FLAC__BYTES_PER_WORD;
+# if defined(_MSC_VER) && (FLAC__BYTES_PER_WORD == 4)
+	if(br->cpu_info.type == FLAC__CPUINFO_TYPE_IA32 && br->cpu_info.data.ia32.bswap) {
+		start = br->words;
+		local_swap32_block_(br->buffer + start, end - start);
+	}
+	else
+# endif
+	for(start = br->words; start < end; start++)
+		br->buffer[start] = SWAP_BE_WORD_TO_HOST(br->buffer[start]);
+#endif
+
+	/* now it looks like:
+	 *   bitstream :  11 22 33 44 55 66 77 88 99 AA BB CC DD EE FF
+	 *   buffer[BE]:  11 22 33 44 55 66 77 88 99 AA BB CC DD EE FF ??
+	 *   buffer[LE]:  44 33 22 11 88 77 66 55 CC BB AA 99 ?? FF EE DD
+	 * finally we'll update the reader values:
+	 */
+	end = br->words*FLAC__BYTES_PER_WORD + br->bytes + bytes;
+	br->words = end / FLAC__BYTES_PER_WORD;
+	br->bytes = end % FLAC__BYTES_PER_WORD;
+
+	return true;
+}
+
+/***********************************************************************
+ *
+ * Class constructor/destructor
+ *
+ ***********************************************************************/
+
+FLAC__BitReader *FLAC__bitreader_new(void)
+{
+	FLAC__BitReader *br = (FLAC__BitReader*)calloc(1, sizeof(FLAC__BitReader));
+
+	/* calloc() implies:
+		memset(br, 0, sizeof(FLAC__BitReader));
+		br->buffer = 0;
+		br->capacity = 0;
+		br->words = br->bytes = 0;
+		br->consumed_words = br->consumed_bits = 0;
+		br->read_callback = 0;
+		br->client_data = 0;
+	*/
+	return br;
+}
+
+void FLAC__bitreader_delete(FLAC__BitReader *br)
+{
+	FLAC__ASSERT(0 != br);
+
+	FLAC__bitreader_free(br);
+	free(br);
+}
+
+/***********************************************************************
+ *
+ * Public class methods
+ *
+ ***********************************************************************/
+
+FLAC__bool FLAC__bitreader_init(FLAC__BitReader *br, FLAC__CPUInfo cpu, FLAC__BitReaderReadCallback rcb, void *cd)
+{
+	FLAC__ASSERT(0 != br);
+
+	br->words = br->bytes = 0;
+	br->consumed_words = br->consumed_bits = 0;
+	br->capacity = FLAC__BITREADER_DEFAULT_CAPACITY;
+	br->buffer = (brword*)malloc(sizeof(brword) * br->capacity);
+	if(br->buffer == 0)
+		return false;
+	br->read_callback = rcb;
+	br->client_data = cd;
+	br->cpu_info = cpu;
+
+	return true;
+}
+
+void FLAC__bitreader_free(FLAC__BitReader *br)
+{
+	FLAC__ASSERT(0 != br);
+
+	if(0 != br->buffer)
+		free(br->buffer);
+	br->buffer = 0;
+	br->capacity = 0;
+	br->words = br->bytes = 0;
+	br->consumed_words = br->consumed_bits = 0;
+	br->read_callback = 0;
+	br->client_data = 0;
+}
+
+FLAC__bool FLAC__bitreader_clear(FLAC__BitReader *br)
+{
+	br->words = br->bytes = 0;
+	br->consumed_words = br->consumed_bits = 0;
+	return true;
+}
+
+void FLAC__bitreader_dump(const FLAC__BitReader *br, FILE *out)
+{
+	unsigned i, j;
+	if(br == 0) {
+		fprintf(out, "bitreader is NULL\n");
+	}
+	else {
+		fprintf(out, "bitreader: capacity=%u words=%u bytes=%u consumed: words=%u, bits=%u\n", br->capacity, br->words, br->bytes, br->consumed_words, br->consumed_bits);
+
+		for(i = 0; i < br->words; i++) {
+			fprintf(out, "%08X: ", i);
+			for(j = 0; j < FLAC__BITS_PER_WORD; j++)
+				if(i < br->consumed_words || (i == br->consumed_words && j < br->consumed_bits))
+					fprintf(out, ".");
+				else
+					fprintf(out, "%01u", br->buffer[i] & (1 << (FLAC__BITS_PER_WORD-j-1)) ? 1:0);
+			fprintf(out, "\n");
+		}
+		if(br->bytes > 0) {
+			fprintf(out, "%08X: ", i);
+			for(j = 0; j < br->bytes*8; j++)
+				if(i < br->consumed_words || (i == br->consumed_words && j < br->consumed_bits))
+					fprintf(out, ".");
+				else
+					fprintf(out, "%01u", br->buffer[i] & (1 << (br->bytes*8-j-1)) ? 1:0);
+			fprintf(out, "\n");
+		}
+	}
+}
+
+void FLAC__bitreader_reset_read_crc16(FLAC__BitReader *br, FLAC__uint16 seed)
+{
+	FLAC__ASSERT(0 != br);
+	FLAC__ASSERT(0 != br->buffer);
+	FLAC__ASSERT((br->consumed_bits & 7) == 0);
+
+	br->read_crc16 = (unsigned)seed;
+	br->crc16_align = br->consumed_bits;
+}
+
+FLAC__uint16 FLAC__bitreader_get_read_crc16(FLAC__BitReader *br)
+{
+	FLAC__ASSERT(0 != br);
+	FLAC__ASSERT(0 != br->buffer);
+	FLAC__ASSERT((br->consumed_bits & 7) == 0);
+	FLAC__ASSERT(br->crc16_align <= br->consumed_bits);
+
+	/* CRC any tail bytes in a partially-consumed word */
+	if(br->consumed_bits) {
+		const brword tail = br->buffer[br->consumed_words];
+		for( ; br->crc16_align < br->consumed_bits; br->crc16_align += 8)
+			br->read_crc16 = FLAC__CRC16_UPDATE((unsigned)((tail >> (FLAC__BITS_PER_WORD-8-br->crc16_align)) & 0xff), br->read_crc16);
+	}
+	return br->read_crc16;
+}
+
+FLaC__INLINE FLAC__bool FLAC__bitreader_is_consumed_byte_aligned(const FLAC__BitReader *br)
+{
+	return ((br->consumed_bits & 7) == 0);
+}
+
+FLaC__INLINE unsigned FLAC__bitreader_bits_left_for_byte_alignment(const FLAC__BitReader *br)
+{
+	return 8 - (br->consumed_bits & 7);
+}
+
+FLaC__INLINE unsigned FLAC__bitreader_get_input_bits_unconsumed(const FLAC__BitReader *br)
+{
+	return (br->words-br->consumed_words)*FLAC__BITS_PER_WORD + br->bytes*8 - br->consumed_bits;
+}
+
+FLaC__INLINE FLAC__bool FLAC__bitreader_read_raw_uint32(FLAC__BitReader *br, FLAC__uint32 *val, unsigned bits)
+{
+	FLAC__ASSERT(0 != br);
+	FLAC__ASSERT(0 != br->buffer);
+
+	FLAC__ASSERT(bits <= 32);
+	FLAC__ASSERT((br->capacity*FLAC__BITS_PER_WORD) * 2 >= bits);
+	FLAC__ASSERT(br->consumed_words <= br->words);
+
+	/* WATCHOUT: code does not work with <32bit words; we can make things much faster with this assertion */
+	FLAC__ASSERT(FLAC__BITS_PER_WORD >= 32);
+
+	if(bits == 0) { /* OPT: investigate if this can ever happen, maybe change to assertion */
+		*val = 0;
+		return true;
+	}
+
+	while((br->words-br->consumed_words)*FLAC__BITS_PER_WORD + br->bytes*8 - br->consumed_bits < bits) {
+		if(!bitreader_read_from_client_(br))
+			return false;
+	}
+	if(br->consumed_words < br->words) { /* if we've not consumed up to a partial tail word... */
+		/* OPT: taking out the consumed_bits==0 "else" case below might make things faster if less code allows the compiler to inline this function */
+		if(br->consumed_bits) {
+			/* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
+			const unsigned n = FLAC__BITS_PER_WORD - br->consumed_bits;
+			const brword word = br->buffer[br->consumed_words];
+			if(bits < n) {
+				*val = (word & (FLAC__WORD_ALL_ONES >> br->consumed_bits)) >> (n-bits);
+				br->consumed_bits += bits;
+				return true;
+			}
+			*val = word & (FLAC__WORD_ALL_ONES >> br->consumed_bits);
+			bits -= n;
+			crc16_update_word_(br, word);
+			br->consumed_words++;
+			br->consumed_bits = 0;
+			if(bits) { /* if there are still bits left to read, there have to be less than 32 so they will all be in the next word */
+				*val <<= bits;
+				*val |= (br->buffer[br->consumed_words] >> (FLAC__BITS_PER_WORD-bits));
+				br->consumed_bits = bits;
+			}
+			return true;
+		}
+		else {
+			const brword word = br->buffer[br->consumed_words];
+			if(bits < FLAC__BITS_PER_WORD) {
+				*val = word >> (FLAC__BITS_PER_WORD-bits);
+				br->consumed_bits = bits;
+				return true;
+			}
+			/* at this point 'bits' must be == FLAC__BITS_PER_WORD; because of previous assertions, it can't be larger */
+			*val = word;
+			crc16_update_word_(br, word);
+			br->consumed_words++;
+			return true;
+		}
+	}
+	else {
+		/* in this case we're starting our read at a partial tail word;
+		 * the reader has guaranteed that we have at least 'bits' bits
+		 * available to read, which makes this case simpler.
+		 */
+		/* OPT: taking out the consumed_bits==0 "else" case below might make things faster if less code allows the compiler to inline this function */
+		if(br->consumed_bits) {
+			/* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
+			FLAC__ASSERT(br->consumed_bits + bits <= br->bytes*8);
+			*val = (br->buffer[br->consumed_words] & (FLAC__WORD_ALL_ONES >> br->consumed_bits)) >> (FLAC__BITS_PER_WORD-br->consumed_bits-bits);
+			br->consumed_bits += bits;
+			return true;
+		}
+		else {
+			*val = br->buffer[br->consumed_words] >> (FLAC__BITS_PER_WORD-bits);
+			br->consumed_bits += bits;
+			return true;
+		}
+	}
+}
+
+FLAC__bool FLAC__bitreader_read_raw_int32(FLAC__BitReader *br, FLAC__int32 *val, unsigned bits)
+{
+	/* OPT: inline raw uint32 code here, or make into a macro if possible in the .h file */
+	if(!FLAC__bitreader_read_raw_uint32(br, (FLAC__uint32*)val, bits))
+		return false;
+	/* sign-extend: */
+	*val <<= (32-bits);
+	*val >>= (32-bits);
+	return true;
+}
+
+FLAC__bool FLAC__bitreader_read_raw_uint64(FLAC__BitReader *br, FLAC__uint64 *val, unsigned bits)
+{
+	FLAC__uint32 hi, lo;
+
+	if(bits > 32) {
+		if(!FLAC__bitreader_read_raw_uint32(br, &hi, bits-32))
+			return false;
+		if(!FLAC__bitreader_read_raw_uint32(br, &lo, 32))
+			return false;
+		*val = hi;
+		*val <<= 32;
+		*val |= lo;
+	}
+	else {
+		if(!FLAC__bitreader_read_raw_uint32(br, &lo, bits))
+			return false;
+		*val = lo;
+	}
+	return true;
+}
+
+FLaC__INLINE FLAC__bool FLAC__bitreader_read_uint32_little_endian(FLAC__BitReader *br, FLAC__uint32 *val)
+{
+	FLAC__uint32 x8, x32 = 0;
+
+	/* this doesn't need to be that fast as currently it is only used for vorbis comments */
+
+	if(!FLAC__bitreader_read_raw_uint32(br, &x32, 8))
+		return false;
+
+	if(!FLAC__bitreader_read_raw_uint32(br, &x8, 8))
+		return false;
+	x32 |= (x8 << 8);
+
+	if(!FLAC__bitreader_read_raw_uint32(br, &x8, 8))
+		return false;
+	x32 |= (x8 << 16);
+
+	if(!FLAC__bitreader_read_raw_uint32(br, &x8, 8))
+		return false;
+	x32 |= (x8 << 24);
+
+	*val = x32;
+	return true;
+}
+
+FLAC__bool FLAC__bitreader_skip_bits_no_crc(FLAC__BitReader *br, unsigned bits)
+{
+	/*
+	 * OPT: a faster implementation is possible but probably not that useful
+	 * since this is only called a couple of times in the metadata readers.
+	 */
+	FLAC__ASSERT(0 != br);
+	FLAC__ASSERT(0 != br->buffer);
+
+	if(bits > 0) {
+		const unsigned n = br->consumed_bits & 7;
+		unsigned m;
+		FLAC__uint32 x;
+
+		if(n != 0) {
+			m = min(8-n, bits);
+			if(!FLAC__bitreader_read_raw_uint32(br, &x, m))
+				return false;
+			bits -= m;
+		}
+		m = bits / 8;
+		if(m > 0) {
+			if(!FLAC__bitreader_skip_byte_block_aligned_no_crc(br, m))
+				return false;
+			bits %= 8;
+		}
+		if(bits > 0) {
+			if(!FLAC__bitreader_read_raw_uint32(br, &x, bits))
+				return false;
+		}
+	}
+
+	return true;
+}
+
+FLAC__bool FLAC__bitreader_skip_byte_block_aligned_no_crc(FLAC__BitReader *br, unsigned nvals)
+{
+	FLAC__uint32 x;
+
+	FLAC__ASSERT(0 != br);
+	FLAC__ASSERT(0 != br->buffer);
+	FLAC__ASSERT(FLAC__bitreader_is_consumed_byte_aligned(br));
+
+	/* step 1: skip over partial head word to get word aligned */
+	while(nvals && br->consumed_bits) { /* i.e. run until we read 'nvals' bytes or we hit the end of the head word */
+		if(!FLAC__bitreader_read_raw_uint32(br, &x, 8))
+			return false;
+		nvals--;
+	}
+	if(0 == nvals)
+		return true;
+	/* step 2: skip whole words in chunks */
+	while(nvals >= FLAC__BYTES_PER_WORD) {
+		if(br->consumed_words < br->words) {
+			br->consumed_words++;
+			nvals -= FLAC__BYTES_PER_WORD;
+		}
+		else if(!bitreader_read_from_client_(br))
+			return false;
+	}
+	/* step 3: skip any remainder from partial tail bytes */
+	while(nvals) {
+		if(!FLAC__bitreader_read_raw_uint32(br, &x, 8))
+			return false;
+		nvals--;
+	}
+
+	return true;
+}
+
+FLAC__bool FLAC__bitreader_read_byte_block_aligned_no_crc(FLAC__BitReader *br, FLAC__byte *val, unsigned nvals)
+{
+	FLAC__uint32 x;
+
+	FLAC__ASSERT(0 != br);
+	FLAC__ASSERT(0 != br->buffer);
+	FLAC__ASSERT(FLAC__bitreader_is_consumed_byte_aligned(br));
+
+	/* step 1: read from partial head word to get word aligned */
+	while(nvals && br->consumed_bits) { /* i.e. run until we read 'nvals' bytes or we hit the end of the head word */
+		if(!FLAC__bitreader_read_raw_uint32(br, &x, 8))
+			return false;
+		*val++ = (FLAC__byte)x;
+		nvals--;
+	}
+	if(0 == nvals)
+		return true;
+	/* step 2: read whole words in chunks */
+	while(nvals >= FLAC__BYTES_PER_WORD) {
+		if(br->consumed_words < br->words) {
+			const brword word = br->buffer[br->consumed_words++];
+#if FLAC__BYTES_PER_WORD == 4
+			val[0] = (FLAC__byte)(word >> 24);
+			val[1] = (FLAC__byte)(word >> 16);
+			val[2] = (FLAC__byte)(word >> 8);
+			val[3] = (FLAC__byte)word;
+#elif FLAC__BYTES_PER_WORD == 8
+			val[0] = (FLAC__byte)(word >> 56);
+			val[1] = (FLAC__byte)(word >> 48);
+			val[2] = (FLAC__byte)(word >> 40);
+			val[3] = (FLAC__byte)(word >> 32);
+			val[4] = (FLAC__byte)(word >> 24);
+			val[5] = (FLAC__byte)(word >> 16);
+			val[6] = (FLAC__byte)(word >> 8);
+			val[7] = (FLAC__byte)word;
+#else
+			for(x = 0; x < FLAC__BYTES_PER_WORD; x++)
+				val[x] = (FLAC__byte)(word >> (8*(FLAC__BYTES_PER_WORD-x-1)));
+#endif
+			val += FLAC__BYTES_PER_WORD;
+			nvals -= FLAC__BYTES_PER_WORD;
+		}
+		else if(!bitreader_read_from_client_(br))
+			return false;
+	}
+	/* step 3: read any remainder from partial tail bytes */
+	while(nvals) {
+		if(!FLAC__bitreader_read_raw_uint32(br, &x, 8))
+			return false;
+		*val++ = (FLAC__byte)x;
+		nvals--;
+	}
+
+	return true;
+}
+
+FLaC__INLINE FLAC__bool FLAC__bitreader_read_unary_unsigned(FLAC__BitReader *br, unsigned *val)
+#if 0 /* slow but readable version */
+{
+	unsigned bit;
+
+	FLAC__ASSERT(0 != br);
+	FLAC__ASSERT(0 != br->buffer);
+
+	*val = 0;
+	while(1) {
+		if(!FLAC__bitreader_read_bit(br, &bit))
+			return false;
+		if(bit)
+			break;
+		else
+			*val++;
+	}
+	return true;
+}
+#else
+{
+	unsigned i;
+
+	FLAC__ASSERT(0 != br);
+	FLAC__ASSERT(0 != br->buffer);
+
+	*val = 0;
+	while(1) {
+		while(br->consumed_words < br->words) { /* if we've not consumed up to a partial tail word... */
+			brword b = br->buffer[br->consumed_words] << br->consumed_bits;
+			if(b) {
+				i = COUNT_ZERO_MSBS(b);
+				*val += i;
+				i++;
+				br->consumed_bits += i;
+				if(br->consumed_bits >= FLAC__BITS_PER_WORD) { /* faster way of testing if(br->consumed_bits == FLAC__BITS_PER_WORD) */
+					crc16_update_word_(br, br->buffer[br->consumed_words]);
+					br->consumed_words++;
+					br->consumed_bits = 0;
+				}
+				return true;
+			}
+			else {
+				*val += FLAC__BITS_PER_WORD - br->consumed_bits;
+				crc16_update_word_(br, br->buffer[br->consumed_words]);
+				br->consumed_words++;
+				br->consumed_bits = 0;
+				/* didn't find stop bit yet, have to keep going... */
+			}
+		}
+		/* at this point we've eaten up all the whole words; have to try
+		 * reading through any tail bytes before calling the read callback.
+		 * this is a repeat of the above logic adjusted for the fact we
+		 * don't have a whole word.  note though if the client is feeding
+		 * us data a byte at a time (unlikely), br->consumed_bits may not
+		 * be zero.
+		 */
+		if(br->bytes) {
+			const unsigned end = br->bytes * 8;
+			brword b = (br->buffer[br->consumed_words] & (FLAC__WORD_ALL_ONES << (FLAC__BITS_PER_WORD-end))) << br->consumed_bits;
+			if(b) {
+				i = COUNT_ZERO_MSBS(b);
+				*val += i;
+				i++;
+				br->consumed_bits += i;
+				FLAC__ASSERT(br->consumed_bits < FLAC__BITS_PER_WORD);
+				return true;
+			}
+			else {
+				*val += end - br->consumed_bits;
+				br->consumed_bits += end;
+				FLAC__ASSERT(br->consumed_bits < FLAC__BITS_PER_WORD);
+				/* didn't find stop bit yet, have to keep going... */
+			}
+		}
+		if(!bitreader_read_from_client_(br))
+			return false;
+	}
+}
+#endif
+
+FLAC__bool FLAC__bitreader_read_rice_signed(FLAC__BitReader *br, int *val, unsigned parameter)
+{
+	FLAC__uint32 lsbs = 0, msbs = 0;
+	unsigned uval;
+
+	FLAC__ASSERT(0 != br);
+	FLAC__ASSERT(0 != br->buffer);
+	FLAC__ASSERT(parameter <= 31);
+
+	/* read the unary MSBs and end bit */
+	if(!FLAC__bitreader_read_unary_unsigned(br, &msbs))
+		return false;
+
+	/* read the binary LSBs */
+	if(!FLAC__bitreader_read_raw_uint32(br, &lsbs, parameter))
+		return false;
+
+	/* compose the value */
+	uval = (msbs << parameter) | lsbs;
+	if(uval & 1)
+		*val = -((int)(uval >> 1)) - 1;
+	else
+		*val = (int)(uval >> 1);
+
+	return true;
+}
+
+/* this is by far the most heavily used reader call.  it ain't pretty but it's fast */
+/* a lot of the logic is copied, then adapted, from FLAC__bitreader_read_unary_unsigned() and FLAC__bitreader_read_raw_uint32() */
+FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter)
+/* OPT: possibly faster version for use with MSVC */
+#ifdef _MSC_VER
+{
+	unsigned i;
+	unsigned uval = 0;
+	unsigned bits; /* the # of binary LSBs left to read to finish a rice codeword */
+
+	/* try and get br->consumed_words and br->consumed_bits into register;
+	 * must remember to flush them back to *br before calling other
+	 * bitwriter functions that use them, and before returning */
+	register unsigned cwords;
+	register unsigned cbits;
+
+	FLAC__ASSERT(0 != br);
+	FLAC__ASSERT(0 != br->buffer);
+	/* WATCHOUT: code does not work with <32bit words; we can make things much faster with this assertion */
+	FLAC__ASSERT(FLAC__BITS_PER_WORD >= 32);
+	FLAC__ASSERT(parameter < 32);
+	/* the above two asserts also guarantee that the binary part never straddles more that 2 words, so we don't have to loop to read it */
+
+	if(nvals == 0)
+		return true;
+
+	cbits = br->consumed_bits;
+	cwords = br->consumed_words;
+
+	while(1) {
+
+		/* read unary part */
+		while(1) {
+			while(cwords < br->words) { /* if we've not consumed up to a partial tail word... */
+				brword b = br->buffer[cwords] << cbits;
+				if(b) {
+#if 0 /* slower, probably due to bad register allocation... */ && defined FLAC__CPU_IA32 && !defined FLAC__NO_ASM && FLAC__BITS_PER_WORD == 32
+					__asm {
+						bsr eax, b
+						not eax
+						and eax, 31
+						mov i, eax
+					}
+#else
+					i = COUNT_ZERO_MSBS(b);
+#endif
+					uval += i;
+					bits = parameter;
+					i++;
+					cbits += i;
+					if(cbits == FLAC__BITS_PER_WORD) {
+						crc16_update_word_(br, br->buffer[cwords]);
+						cwords++;
+						cbits = 0;
+					}
+					goto break1;
+				}
+				else {
+					uval += FLAC__BITS_PER_WORD - cbits;
+					crc16_update_word_(br, br->buffer[cwords]);
+					cwords++;
+					cbits = 0;
+					/* didn't find stop bit yet, have to keep going... */
+				}
+			}
+			/* at this point we've eaten up all the whole words; have to try
+			 * reading through any tail bytes before calling the read callback.
+			 * this is a repeat of the above logic adjusted for the fact we
+			 * don't have a whole word.  note though if the client is feeding
+			 * us data a byte at a time (unlikely), br->consumed_bits may not
+			 * be zero.
+			 */
+			if(br->bytes) {
+				const unsigned end = br->bytes * 8;
+				brword b = (br->buffer[cwords] & (FLAC__WORD_ALL_ONES << (FLAC__BITS_PER_WORD-end))) << cbits;
+				if(b) {
+					i = COUNT_ZERO_MSBS(b);
+					uval += i;
+					bits = parameter;
+					i++;
+					cbits += i;
+					FLAC__ASSERT(cbits < FLAC__BITS_PER_WORD);
+					goto break1;
+				}
+				else {
+					uval += end - cbits;
+					cbits += end;
+					FLAC__ASSERT(cbits < FLAC__BITS_PER_WORD);
+					/* didn't find stop bit yet, have to keep going... */
+				}
+			}
+			/* flush registers and read; bitreader_read_from_client_() does
+			 * not touch br->consumed_bits at all but we still need to set
+			 * it in case it fails and we have to return false.
+			 */
+			br->consumed_bits = cbits;
+			br->consumed_words = cwords;
+			if(!bitreader_read_from_client_(br))
+				return false;
+			cwords = br->consumed_words;
+		}
+break1:
+		/* read binary part */
+		FLAC__ASSERT(cwords <= br->words);
+
+		if(bits) {
+			while((br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits < bits) {
+				/* flush registers and read; bitreader_read_from_client_() does
+				 * not touch br->consumed_bits at all but we still need to set
+				 * it in case it fails and we have to return false.
+				 */
+				br->consumed_bits = cbits;
+				br->consumed_words = cwords;
+				if(!bitreader_read_from_client_(br))
+					return false;
+				cwords = br->consumed_words;
+			}
+			if(cwords < br->words) { /* if we've not consumed up to a partial tail word... */
+				if(cbits) {
+					/* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
+					const unsigned n = FLAC__BITS_PER_WORD - cbits;
+					const brword word = br->buffer[cwords];
+					if(bits < n) {
+						uval <<= bits;
+						uval |= (word & (FLAC__WORD_ALL_ONES >> cbits)) >> (n-bits);
+						cbits += bits;
+						goto break2;
+					}
+					uval <<= n;
+					uval |= word & (FLAC__WORD_ALL_ONES >> cbits);
+					bits -= n;
+					crc16_update_word_(br, word);
+					cwords++;
+					cbits = 0;
+					if(bits) { /* if there are still bits left to read, there have to be less than 32 so they will all be in the next word */
+						uval <<= bits;
+						uval |= (br->buffer[cwords] >> (FLAC__BITS_PER_WORD-bits));
+						cbits = bits;
+					}
+					goto break2;
+				}
+				else {
+					FLAC__ASSERT(bits < FLAC__BITS_PER_WORD);
+					uval <<= bits;
+					uval |= br->buffer[cwords] >> (FLAC__BITS_PER_WORD-bits);
+					cbits = bits;
+					goto break2;
+				}
+			}
+			else {
+				/* in this case we're starting our read at a partial tail word;
+				 * the reader has guaranteed that we have at least 'bits' bits
+				 * available to read, which makes this case simpler.
+				 */
+				uval <<= bits;
+				if(cbits) {
+					/* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
+					FLAC__ASSERT(cbits + bits <= br->bytes*8);
+					uval |= (br->buffer[cwords] & (FLAC__WORD_ALL_ONES >> cbits)) >> (FLAC__BITS_PER_WORD-cbits-bits);
+					cbits += bits;
+					goto break2;
+				}
+				else {
+					uval |= br->buffer[cwords] >> (FLAC__BITS_PER_WORD-bits);
+					cbits += bits;
+					goto break2;
+				}
+			}
+		}
+break2:
+		/* compose the value */
+		*vals = (int)(uval >> 1 ^ -(int)(uval & 1));
+
+		/* are we done? */
+		--nvals;
+		if(nvals == 0) {
+			br->consumed_bits = cbits;
+			br->consumed_words = cwords;
+			return true;
+		}
+
+		uval = 0;
+		++vals;
+
+	}
+}
+#else
+{
+	unsigned i;
+	unsigned uval = 0;
+
+	/* try and get br->consumed_words and br->consumed_bits into register;
+	 * must remember to flush them back to *br before calling other
+	 * bitwriter functions that use them, and before returning */
+	register unsigned cwords;
+	register unsigned cbits;
+	unsigned ucbits; /* keep track of the number of unconsumed bits in the buffer */
+
+	FLAC__ASSERT(0 != br);
+	FLAC__ASSERT(0 != br->buffer);
+	/* WATCHOUT: code does not work with <32bit words; we can make things much faster with this assertion */
+	FLAC__ASSERT(FLAC__BITS_PER_WORD >= 32);
+	FLAC__ASSERT(parameter < 32);
+	/* the above two asserts also guarantee that the binary part never straddles more than 2 words, so we don't have to loop to read it */
+
+	if(nvals == 0)
+		return true;
+
+	cbits = br->consumed_bits;
+	cwords = br->consumed_words;
+	ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits;
+
+	while(1) {
+
+		/* read unary part */
+		while(1) {
+			while(cwords < br->words) { /* if we've not consumed up to a partial tail word... */
+				brword b = br->buffer[cwords] << cbits;
+				if(b) {
+#if 0 /* is not discernably faster... */ && defined FLAC__CPU_IA32 && !defined FLAC__NO_ASM && FLAC__BITS_PER_WORD == 32 && defined __GNUC__
+					asm volatile (
+						"bsrl %1, %0;"
+						"notl %0;"
+						"andl $31, %0;"
+						: "=r"(i)
+						: "r"(b)
+					);
+#else
+					i = COUNT_ZERO_MSBS(b);
+#endif
+					uval += i;
+					cbits += i;
+					cbits++; /* skip over stop bit */
+					if(cbits >= FLAC__BITS_PER_WORD) { /* faster way of testing if(cbits == FLAC__BITS_PER_WORD) */
+						crc16_update_word_(br, br->buffer[cwords]);
+						cwords++;
+						cbits = 0;
+					}
+					goto break1;
+				}
+				else {
+					uval += FLAC__BITS_PER_WORD - cbits;
+					crc16_update_word_(br, br->buffer[cwords]);
+					cwords++;
+					cbits = 0;
+					/* didn't find stop bit yet, have to keep going... */
+				}
+			}
+			/* at this point we've eaten up all the whole words; have to try
+			 * reading through any tail bytes before calling the read callback.
+			 * this is a repeat of the above logic adjusted for the fact we
+			 * don't have a whole word.  note though if the client is feeding
+			 * us data a byte at a time (unlikely), br->consumed_bits may not
+			 * be zero.
+			 */
+			if(br->bytes) {
+				const unsigned end = br->bytes * 8;
+				brword b = (br->buffer[cwords] & ~(FLAC__WORD_ALL_ONES >> end)) << cbits;
+				if(b) {
+					i = COUNT_ZERO_MSBS(b);
+					uval += i;
+					cbits += i;
+					cbits++; /* skip over stop bit */
+					FLAC__ASSERT(cbits < FLAC__BITS_PER_WORD);
+					goto break1;
+				}
+				else {
+					uval += end - cbits;
+					cbits += end;
+					FLAC__ASSERT(cbits < FLAC__BITS_PER_WORD);
+					/* didn't find stop bit yet, have to keep going... */
+				}
+			}
+			/* flush registers and read; bitreader_read_from_client_() does
+			 * not touch br->consumed_bits at all but we still need to set
+			 * it in case it fails and we have to return false.
+			 */
+			br->consumed_bits = cbits;
+			br->consumed_words = cwords;
+			if(!bitreader_read_from_client_(br))
+				return false;
+			cwords = br->consumed_words;
+			ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits + uval;
+			/* + uval to offset our count by the # of unary bits already
+			 * consumed before the read, because we will add these back
+			 * in all at once at break1
+			 */
+		}
+break1:
+		ucbits -= uval;
+		ucbits--; /* account for stop bit */
+
+		/* read binary part */
+		FLAC__ASSERT(cwords <= br->words);
+
+		if(parameter) {
+			while(ucbits < parameter) {
+				/* flush registers and read; bitreader_read_from_client_() does
+				 * not touch br->consumed_bits at all but we still need to set
+				 * it in case it fails and we have to return false.
+				 */
+				br->consumed_bits = cbits;
+				br->consumed_words = cwords;
+				if(!bitreader_read_from_client_(br))
+					return false;
+				cwords = br->consumed_words;
+				ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits;
+			}
+			if(cwords < br->words) { /* if we've not consumed up to a partial tail word... */
+				if(cbits) {
+					/* this also works when consumed_bits==0, it's just slower than necessary for that case */
+					const unsigned n = FLAC__BITS_PER_WORD - cbits;
+					const brword word = br->buffer[cwords];
+					if(parameter < n) {
+						uval <<= parameter;
+						uval |= (word & (FLAC__WORD_ALL_ONES >> cbits)) >> (n-parameter);
+						cbits += parameter;
+					}
+					else {
+						uval <<= n;
+						uval |= word & (FLAC__WORD_ALL_ONES >> cbits);
+						crc16_update_word_(br, word);
+						cwords++;
+						cbits = parameter - n;
+						if(cbits) { /* parameter > n, i.e. if there are still bits left to read, there have to be less than 32 so they will all be in the next word */
+							uval <<= cbits;
+							uval |= (br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits));
+						}
+					}
+				}
+				else {
+					cbits = parameter;
+					uval <<= parameter;
+					uval |= br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits);
+				}
+			}
+			else {
+				/* in this case we're starting our read at a partial tail word;
+				 * the reader has guaranteed that we have at least 'parameter'
+				 * bits available to read, which makes this case simpler.
+				 */
+				uval <<= parameter;
+				if(cbits) {
+					/* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
+					FLAC__ASSERT(cbits + parameter <= br->bytes*8);
+					uval |= (br->buffer[cwords] & (FLAC__WORD_ALL_ONES >> cbits)) >> (FLAC__BITS_PER_WORD-cbits-parameter);
+					cbits += parameter;
+				}
+				else {
+					cbits = parameter;
+					uval |= br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits);
+				}
+			}
+		}
+
+		ucbits -= parameter;
+
+		/* compose the value */
+		*vals = (int)(uval >> 1 ^ -(int)(uval & 1));
+
+		/* are we done? */
+		--nvals;
+		if(nvals == 0) {
+			br->consumed_bits = cbits;
+			br->consumed_words = cwords;
+			return true;
+		}
+
+		uval = 0;
+		++vals;
+
+	}
+}
+#endif
+
+#if 0 /* UNUSED */
+FLAC__bool FLAC__bitreader_read_golomb_signed(FLAC__BitReader *br, int *val, unsigned parameter)
+{
+	FLAC__uint32 lsbs = 0, msbs = 0;
+	unsigned bit, uval, k;
+
+	FLAC__ASSERT(0 != br);
+	FLAC__ASSERT(0 != br->buffer);
+
+	k = FLAC__bitmath_ilog2(parameter);
+
+	/* read the unary MSBs and end bit */
+	if(!FLAC__bitreader_read_unary_unsigned(br, &msbs))
+		return false;
+
+	/* read the binary LSBs */
+	if(!FLAC__bitreader_read_raw_uint32(br, &lsbs, k))
+		return false;
+
+	if(parameter == 1u<<k) {
+		/* compose the value */
+		uval = (msbs << k) | lsbs;
+	}
+	else {
+		unsigned d = (1 << (k+1)) - parameter;
+		if(lsbs >= d) {
+			if(!FLAC__bitreader_read_bit(br, &bit))
+				return false;
+			lsbs <<= 1;
+			lsbs |= bit;
+			lsbs -= d;
+		}
+		/* compose the value */
+		uval = msbs * parameter + lsbs;
+	}
+
+	/* unfold unsigned to signed */
+	if(uval & 1)
+		*val = -((int)(uval >> 1)) - 1;
+	else
+		*val = (int)(uval >> 1);
+
+	return true;
+}
+
+FLAC__bool FLAC__bitreader_read_golomb_unsigned(FLAC__BitReader *br, unsigned *val, unsigned parameter)
+{
+	FLAC__uint32 lsbs, msbs = 0;
+	unsigned bit, k;
+
+	FLAC__ASSERT(0 != br);
+	FLAC__ASSERT(0 != br->buffer);
+
+	k = FLAC__bitmath_ilog2(parameter);
+
+	/* read the unary MSBs and end bit */
+	if(!FLAC__bitreader_read_unary_unsigned(br, &msbs))
+		return false;
+
+	/* read the binary LSBs */
+	if(!FLAC__bitreader_read_raw_uint32(br, &lsbs, k))
+		return false;
+
+	if(parameter == 1u<<k) {
+		/* compose the value */
+		*val = (msbs << k) | lsbs;
+	}
+	else {
+		unsigned d = (1 << (k+1)) - parameter;
+		if(lsbs >= d) {
+			if(!FLAC__bitreader_read_bit(br, &bit))
+				return false;
+			lsbs <<= 1;
+			lsbs |= bit;
+			lsbs -= d;
+		}
+		/* compose the value */
+		*val = msbs * parameter + lsbs;
+	}
+
+	return true;
+}
+#endif /* UNUSED */
+
+/* on return, if *val == 0xffffffff then the utf-8 sequence was invalid, but the return value will be true */
+FLAC__bool FLAC__bitreader_read_utf8_uint32(FLAC__BitReader *br, FLAC__uint32 *val, FLAC__byte *raw, unsigned *rawlen)
+{
+	FLAC__uint32 v = 0;
+	FLAC__uint32 x;
+	unsigned i;
+
+	if(!FLAC__bitreader_read_raw_uint32(br, &x, 8))
+		return false;
+	if(raw)
+		raw[(*rawlen)++] = (FLAC__byte)x;
+	if(!(x & 0x80)) { /* 0xxxxxxx */
+		v = x;
+		i = 0;
+	}
+	else if(x & 0xC0 && !(x & 0x20)) { /* 110xxxxx */
+		v = x & 0x1F;
+		i = 1;
+	}
+	else if(x & 0xE0 && !(x & 0x10)) { /* 1110xxxx */
+		v = x & 0x0F;
+		i = 2;
+	}
+	else if(x & 0xF0 && !(x & 0x08)) { /* 11110xxx */
+		v = x & 0x07;
+		i = 3;
+	}
+	else if(x & 0xF8 && !(x & 0x04)) { /* 111110xx */
+		v = x & 0x03;
+		i = 4;
+	}
+	else if(x & 0xFC && !(x & 0x02)) { /* 1111110x */
+		v = x & 0x01;
+		i = 5;
+	}
+	else {
+		*val = 0xffffffff;
+		return true;
+	}
+	for( ; i; i--) {
+		if(!FLAC__bitreader_read_raw_uint32(br, &x, 8))
+			return false;
+		if(raw)
+			raw[(*rawlen)++] = (FLAC__byte)x;
+		if(!(x & 0x80) || (x & 0x40)) { /* 10xxxxxx */
+			*val = 0xffffffff;
+			return true;
+		}
+		v <<= 6;
+		v |= (x & 0x3F);
+	}
+	*val = v;
+	return true;
+}
+
+/* on return, if *val == 0xffffffffffffffff then the utf-8 sequence was invalid, but the return value will be true */
+FLAC__bool FLAC__bitreader_read_utf8_uint64(FLAC__BitReader *br, FLAC__uint64 *val, FLAC__byte *raw, unsigned *rawlen)
+{
+	FLAC__uint64 v = 0;
+	FLAC__uint32 x;
+	unsigned i;
+
+	if(!FLAC__bitreader_read_raw_uint32(br, &x, 8))
+		return false;
+	if(raw)
+		raw[(*rawlen)++] = (FLAC__byte)x;
+	if(!(x & 0x80)) { /* 0xxxxxxx */
+		v = x;
+		i = 0;
+	}
+	else if(x & 0xC0 && !(x & 0x20)) { /* 110xxxxx */
+		v = x & 0x1F;
+		i = 1;
+	}
+	else if(x & 0xE0 && !(x & 0x10)) { /* 1110xxxx */
+		v = x & 0x0F;
+		i = 2;
+	}
+	else if(x & 0xF0 && !(x & 0x08)) { /* 11110xxx */
+		v = x & 0x07;
+		i = 3;
+	}
+	else if(x & 0xF8 && !(x & 0x04)) { /* 111110xx */
+		v = x & 0x03;
+		i = 4;
+	}
+	else if(x & 0xFC && !(x & 0x02)) { /* 1111110x */
+		v = x & 0x01;
+		i = 5;
+	}
+	else if(x & 0xFE && !(x & 0x01)) { /* 11111110 */
+		v = 0;
+		i = 6;
+	}
+	else {
+		*val = FLAC__U64L(0xffffffffffffffff);
+		return true;
+	}
+	for( ; i; i--) {
+		if(!FLAC__bitreader_read_raw_uint32(br, &x, 8))
+			return false;
+		if(raw)
+			raw[(*rawlen)++] = (FLAC__byte)x;
+		if(!(x & 0x80) || (x & 0x40)) { /* 10xxxxxx */
+			*val = FLAC__U64L(0xffffffffffffffff);
+			return true;
+		}
+		v <<= 6;
+		v |= (x & 0x3F);
+	}
+	*val = v;
+	return true;
+}
diff --git a/FLAC/cpu.c b/FLAC/cpu.c
index 225bf4fade..9cc759b9e5 100644
--- a/FLAC/cpu.c
+++ b/FLAC/cpu.c
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2001,2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,15 +29,17 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
 #include "private/cpu.h"
 #include <stdlib.h>
 #include <stdio.h>
 
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#if defined FLAC__CPU_PPC
+#if defined FLAC__CPU_IA32
+# include <signal.h>
+#elif defined FLAC__CPU_PPC
 # if !defined FLAC__NO_ASM
 #  if defined FLAC__SYS_DARWIN
 #   include <sys/sysctl.h>
@@ -50,11 +52,6 @@
 #   endif
 #  else /* FLAC__SYS_DARWIN */
 
-#   ifdef __FreeBSD__
-#    include <sys/types.h>
-#    include <sys/sysctl.h>
-#   endif
-
 #   include <signal.h>
 #   include <setjmp.h>
 
@@ -74,68 +71,291 @@ static void sigill_handler (int sig)
 # endif /* FLAC__NO_ASM */
 #endif /* FLAC__CPU_PPC */
 
-const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV = 0x00008000;
-const unsigned FLAC__CPUINFO_IA32_CPUID_MMX = 0x00800000;
-const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR = 0x01000000;
-const unsigned FLAC__CPUINFO_IA32_CPUID_SSE = 0x02000000;
-const unsigned FLAC__CPUINFO_IA32_CPUID_SSE2 = 0x04000000;
+#if defined (__NetBSD__) || defined(__OpenBSD__)
+#include <sys/param.h>
+#include <sys/sysctl.h>
+#include <machine/cpu.h>
+#endif
 
-const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW = 0x80000000;
-const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW = 0x40000000;
-const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX = 0x00400000;
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__)
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#endif
+
+#if defined(__APPLE__)
+/* how to get sysctlbyname()? */
+#endif
+
+/* these are flags in EDX of CPUID AX=00000001 */
+static const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV = 0x00008000;
+static const unsigned FLAC__CPUINFO_IA32_CPUID_MMX = 0x00800000;
+static const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR = 0x01000000;
+static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE = 0x02000000;
+static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE2 = 0x04000000;
+/* these are flags in ECX of CPUID AX=00000001 */
+static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE3 = 0x00000001;
+static const unsigned FLAC__CPUINFO_IA32_CPUID_SSSE3 = 0x00000200;
+/* these are flags in EDX of CPUID AX=80000001 */
+static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW = 0x80000000;
+static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW = 0x40000000;
+static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX = 0x00400000;
+
+
+/*
+ * Extra stuff needed for detection of OS support for SSE on IA-32
+ */
+#if defined(FLAC__CPU_IA32) && !defined FLAC__NO_ASM && defined FLAC__HAS_NASM && !defined FLAC__NO_SSE_OS && !defined FLAC__SSE_OS
+# if defined(__linux__)
+/*
+ * If the OS doesn't support SSE, we will get here with a SIGILL.  We
+ * modify the return address to jump over the offending SSE instruction
+ * and also the operation following it that indicates the instruction
+ * executed successfully.  In this way we use no global variables and
+ * stay thread-safe.
+ *
+ * 3 + 3 + 6:
+ *   3 bytes for "xorps xmm0,xmm0"
+ *   3 bytes for estimate of how long the follwing "inc var" instruction is
+ *   6 bytes extra in case our estimate is wrong
+ * 12 bytes puts us in the NOP "landing zone"
+ */
+#  undef USE_OBSOLETE_SIGCONTEXT_FLAVOR /* #define this to use the older signal handler method */
+#  ifdef USE_OBSOLETE_SIGCONTEXT_FLAVOR
+	static void sigill_handler_sse_os(int signal, struct sigcontext sc)
+	{
+		(void)signal;
+		sc.eip += 3 + 3 + 6;
+	}
+#  else
+#   include <sys/ucontext.h>
+	static void sigill_handler_sse_os(int signal, siginfo_t *si, void *uc)
+	{
+		(void)signal, (void)si;
+		((ucontext_t*)uc)->uc_mcontext.gregs[14/*REG_EIP*/] += 3 + 3 + 6;
+	}
+#  endif
+# elif defined(_MSC_VER)
+#  include <windows.h>
+#  undef USE_TRY_CATCH_FLAVOR /* #define this to use the try/catch method for catching illegal opcode exception */
+#  ifdef USE_TRY_CATCH_FLAVOR
+#  else
+	LONG CALLBACK sigill_handler_sse_os(EXCEPTION_POINTERS *ep)
+	{
+		if(ep->ExceptionRecord->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION) {
+			ep->ContextRecord->Eip += 3 + 3 + 6;
+			return EXCEPTION_CONTINUE_EXECUTION;
+		}
+		return EXCEPTION_CONTINUE_SEARCH;
+	}
+#  endif
+# endif
+#endif
 
 
 void FLAC__cpu_info(FLAC__CPUInfo *info)
 {
+/*
+ * IA32-specific
+ */
 #ifdef FLAC__CPU_IA32
 	info->type = FLAC__CPUINFO_TYPE_IA32;
 #if !defined FLAC__NO_ASM && defined FLAC__HAS_NASM
-	info->use_asm = true;
-	{
-		unsigned cpuid = FLAC__cpu_info_asm_ia32();
-		info->data.ia32.cmov = (cpuid & FLAC__CPUINFO_IA32_CPUID_CMOV)? true : false;
-		info->data.ia32.mmx = (cpuid & FLAC__CPUINFO_IA32_CPUID_MMX)? true : false;
-		info->data.ia32.fxsr = (cpuid & FLAC__CPUINFO_IA32_CPUID_FXSR)? true : false;
-		info->data.ia32.sse = (cpuid & FLAC__CPUINFO_IA32_CPUID_SSE)? true : false;
-		info->data.ia32.sse2 = (cpuid & FLAC__CPUINFO_IA32_CPUID_SSE2)? true : false;
-
-#ifndef FLAC__SSE_OS
-		info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = false;
-#elif defined(__FreeBSD__)
-		/* on FreeBSD we can double-check via sysctl whether the OS supports SSE */
-		{
-			int sse;
-			size_t len = sizeof(sse);
-			if (sysctlbyname("hw.instruction_sse", &sse, &len, NULL, 0) || !sse)
-				info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = false;
-		}
-#endif
+	info->use_asm = true; /* we assume a minimum of 80386 with FLAC__CPU_IA32 */
+	info->data.ia32.cpuid = FLAC__cpu_have_cpuid_asm_ia32()? true : false;
+	info->data.ia32.bswap = info->data.ia32.cpuid; /* CPUID => BSWAP since it came after */
+	info->data.ia32.cmov = false;
+	info->data.ia32.mmx = false;
+	info->data.ia32.fxsr = false;
+	info->data.ia32.sse = false;
+	info->data.ia32.sse2 = false;
+	info->data.ia32.sse3 = false;
+	info->data.ia32.ssse3 = false;
+	info->data.ia32._3dnow = false;
+	info->data.ia32.ext3dnow = false;
+	info->data.ia32.extmmx = false;
+	if(info->data.ia32.cpuid) {
+		/* http://www.sandpile.org/ia32/cpuid.htm */
+		FLAC__uint32 flags_edx, flags_ecx;
+		FLAC__cpu_info_asm_ia32(&flags_edx, &flags_ecx);
+		info->data.ia32.cmov  = (flags_edx & FLAC__CPUINFO_IA32_CPUID_CMOV )? true : false;
+		info->data.ia32.mmx   = (flags_edx & FLAC__CPUINFO_IA32_CPUID_MMX  )? true : false;
+		info->data.ia32.fxsr  = (flags_edx & FLAC__CPUINFO_IA32_CPUID_FXSR )? true : false;
+		info->data.ia32.sse   = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE  )? true : false;
+		info->data.ia32.sse2  = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE2 )? true : false;
+		info->data.ia32.sse3  = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false;
+		info->data.ia32.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false;
 
 #ifdef FLAC__USE_3DNOW
-		cpuid = FLAC__cpu_info_extended_amd_asm_ia32();
-		info->data.ia32._3dnow = (cpuid & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW)? true : false;
-		info->data.ia32.ext3dnow = (cpuid & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW)? true : false;
-		info->data.ia32.extmmx = (cpuid & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX)? true : false;
+		flags_edx = FLAC__cpu_info_extended_amd_asm_ia32();
+		info->data.ia32._3dnow   = (flags_edx & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW   )? true : false;
+		info->data.ia32.ext3dnow = (flags_edx & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW)? true : false;
+		info->data.ia32.extmmx   = (flags_edx & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX  )? true : false;
 #else
 		info->data.ia32._3dnow = info->data.ia32.ext3dnow = info->data.ia32.extmmx = false;
 #endif
+
+#ifdef DEBUG
+		fprintf(stderr, "CPU info (IA-32):\n");
+		fprintf(stderr, "  CPUID ...... %c\n", info->data.ia32.cpuid   ? 'Y' : 'n');
+		fprintf(stderr, "  BSWAP ...... %c\n", info->data.ia32.bswap   ? 'Y' : 'n');
+		fprintf(stderr, "  CMOV ....... %c\n", info->data.ia32.cmov    ? 'Y' : 'n');
+		fprintf(stderr, "  MMX ........ %c\n", info->data.ia32.mmx     ? 'Y' : 'n');
+		fprintf(stderr, "  FXSR ....... %c\n", info->data.ia32.fxsr    ? 'Y' : 'n');
+		fprintf(stderr, "  SSE ........ %c\n", info->data.ia32.sse     ? 'Y' : 'n');
+		fprintf(stderr, "  SSE2 ....... %c\n", info->data.ia32.sse2    ? 'Y' : 'n');
+		fprintf(stderr, "  SSE3 ....... %c\n", info->data.ia32.sse3    ? 'Y' : 'n');
+		fprintf(stderr, "  SSSE3 ...... %c\n", info->data.ia32.ssse3   ? 'Y' : 'n');
+		fprintf(stderr, "  3DNow! ..... %c\n", info->data.ia32._3dnow  ? 'Y' : 'n');
+		fprintf(stderr, "  3DNow!-ext . %c\n", info->data.ia32.ext3dnow? 'Y' : 'n');
+		fprintf(stderr, "  3DNow!-MMX . %c\n", info->data.ia32.extmmx  ? 'Y' : 'n');
+#endif
+
+		/*
+		 * now have to check for OS support of SSE/SSE2
+		 */
+		if(info->data.ia32.fxsr || info->data.ia32.sse || info->data.ia32.sse2) {
+#if defined FLAC__NO_SSE_OS
+			/* assume user knows better than us; turn it off */
+			info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
+#elif defined FLAC__SSE_OS
+			/* assume user knows better than us; leave as detected above */
+#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__) || defined(__APPLE__)
+			int sse = 0;
+			size_t len;
+			/* at least one of these must work: */
+			len = sizeof(sse); sse = sse || (sysctlbyname("hw.instruction_sse", &sse, &len, NULL, 0) == 0 && sse);
+			len = sizeof(sse); sse = sse || (sysctlbyname("hw.optional.sse"   , &sse, &len, NULL, 0) == 0 && sse); /* __APPLE__ ? */
+			if(!sse)
+				info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
+#elif defined(__NetBSD__) || defined (__OpenBSD__)
+# if __NetBSD_Version__ >= 105250000 || (defined __OpenBSD__)
+			int val = 0, mib[2] = { CTL_MACHDEP, CPU_SSE };
+			size_t len = sizeof(val);
+			if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val)
+				info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
+			else { /* double-check SSE2 */
+				mib[1] = CPU_SSE2;
+				len = sizeof(val);
+				if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val)
+					info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
+			}
+# else
+			info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
+# endif
+#elif defined(__linux__)
+			int sse = 0;
+			struct sigaction sigill_save;
+#ifdef USE_OBSOLETE_SIGCONTEXT_FLAVOR
+			if(0 == sigaction(SIGILL, NULL, &sigill_save) && signal(SIGILL, (void (*)(int))sigill_handler_sse_os) != SIG_ERR)
+#else
+			struct sigaction sigill_sse;
+			sigill_sse.sa_sigaction = sigill_handler_sse_os;
+			__sigemptyset(&sigill_sse.sa_mask);
+			sigill_sse.sa_flags = SA_SIGINFO | SA_RESETHAND; /* SA_RESETHAND just in case our SIGILL return jump breaks, so we don't get stuck in a loop */
+			if(0 == sigaction(SIGILL, &sigill_sse, &sigill_save))
+#endif
+			{
+				/* http://www.ibiblio.org/gferg/ldp/GCC-Inline-Assembly-HOWTO.html */
+				/* see sigill_handler_sse_os() for an explanation of the following: */
+				asm volatile (
+					"xorl %0,%0\n\t"          /* for some reason, still need to do this to clear 'sse' var */
+					"xorps %%xmm0,%%xmm0\n\t" /* will cause SIGILL if unsupported by OS */
+					"incl %0\n\t"             /* SIGILL handler will jump over this */
+					/* landing zone */
+					"nop\n\t" /* SIGILL jump lands here if "inc" is 9 bytes */
+					"nop\n\t"
+					"nop\n\t"
+					"nop\n\t"
+					"nop\n\t"
+					"nop\n\t"
+					"nop\n\t" /* SIGILL jump lands here if "inc" is 3 bytes (expected) */
+					"nop\n\t"
+					"nop"     /* SIGILL jump lands here if "inc" is 1 byte */
+					: "=r"(sse)
+					: "r"(sse)
+				);
+
+				sigaction(SIGILL, &sigill_save, NULL);
+			}
+
+			if(!sse)
+				info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
+#elif defined(_MSC_VER)
+# ifdef USE_TRY_CATCH_FLAVOR
+			_try {
+				__asm {
+#  if _MSC_VER <= 1200
+					/* VC6 assembler doesn't know SSE, have to emit bytecode instead */
+					_emit 0x0F
+					_emit 0x57
+					_emit 0xC0
+#  else
+					xorps xmm0,xmm0
+#  endif
+				}
+			}
+			_except(EXCEPTION_EXECUTE_HANDLER) {
+				if (_exception_code() == STATUS_ILLEGAL_INSTRUCTION)
+					info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
+			}
+# else
+			int sse = 0;
+			LPTOP_LEVEL_EXCEPTION_FILTER save = SetUnhandledExceptionFilter(sigill_handler_sse_os);
+			/* see GCC version above for explanation */
+			/*  http://msdn2.microsoft.com/en-us/library/4ks26t93.aspx */
+			/*  http://www.codeproject.com/cpp/gccasm.asp */
+			/*  http://www.hick.org/~mmiller/msvc_inline_asm.html */
+			__asm {
+#  if _MSC_VER <= 1200
+				/* VC6 assembler doesn't know SSE, have to emit bytecode instead */
+				_emit 0x0F
+				_emit 0x57
+				_emit 0xC0
+#  else
+				xorps xmm0,xmm0
+#  endif
+				inc sse
+				nop
+				nop
+				nop
+				nop
+				nop
+				nop
+				nop
+				nop
+				nop
+			}
+			SetUnhandledExceptionFilter(save);
+			if(!sse)
+				info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
+# endif
+#else
+			/* no way to test, disable to be safe */
+			info->data.ia32.fxsr = info->data.ia32.sse = info->data.ia32.sse2 = info->data.ia32.sse3 = info->data.ia32.ssse3 = false;
+#endif
+#ifdef DEBUG
+		fprintf(stderr, "  SSE OS sup . %c\n", info->data.ia32.sse     ? 'Y' : 'n');
+#endif
+
+		}
 	}
 #else
 	info->use_asm = false;
 #endif
+
+/*
+ * PPC-specific
+ */
 #elif defined FLAC__CPU_PPC
 	info->type = FLAC__CPUINFO_TYPE_PPC;
-#if !defined FLAC__NO_ASM
+# if !defined FLAC__NO_ASM
 	info->use_asm = true;
-#ifdef FLAC__USE_ALTIVEC
-#if defined FLAC__SYS_DARWIN
+#  ifdef FLAC__USE_ALTIVEC
+#   if defined FLAC__SYS_DARWIN
 	{
-		int selectors[2] = { CTL_HW, HW_VECTORUNIT };
-		int result = 0;
-		size_t length = sizeof(result);
-		int error = sysctl(selectors, 2, &result, &length, 0, 0);
-
-		info->data.ppc.altivec = error==0 ? result!=0 : 0;
+		int val = 0, mib[2] = { CTL_HW, HW_VECTORUNIT };
+		size_t len = sizeof(val);
+		info->data.ppc.altivec = !(sysctl(mib, 2, &val, &len, NULL, 0) || !val);
 	}
 	{
 		host_basic_info_data_t hostInfo;
@@ -146,14 +366,15 @@ void FLAC__cpu_info(FLAC__CPUInfo *info)
 
 		info->data.ppc.ppc64 = (hostInfo.cpu_type == CPU_TYPE_POWERPC) && (hostInfo.cpu_subtype == CPU_SUBTYPE_POWERPC_970);
 	}
-#else /* FLAC__SYS_DARWIN */
+#   else /* FLAC__USE_ALTIVEC && !FLAC__SYS_DARWIN */
 	{
 		/* no Darwin, do it the brute-force way */
-		/* this is borrowed from MPlayer from the libmpeg2 library */
+		/* @@@@@@ this is not thread-safe; replace with SSE OS method above or remove */
 		info->data.ppc.altivec = 0;
 		info->data.ppc.ppc64 = 0;
 
 		signal (SIGILL, sigill_handler);
+		canjump = 0;
 		if (!sigsetjmp (jmpbuf, 1)) {
 			canjump = 1;
 
@@ -176,16 +397,20 @@ void FLAC__cpu_info(FLAC__CPUInfo *info)
 
 			info->data.ppc.ppc64 = 1;
 		}
-		signal (SIGILL, SIG_DFL);
+		signal (SIGILL, SIG_DFL); /*@@@@@@ should save and restore old signal */
 	}
-#endif /* FLAC__SYS_DARWIN */
-#else /* FLAC__USE_ALTIVEC */
+#   endif
+#  else /* !FLAC__USE_ALTIVEC */
 	info->data.ppc.altivec = 0;
 	info->data.ppc.ppc64 = 0;
-#endif /* FLAC__USE_ALTIVEC */
-#else /* FLAC__NO_ASM */
+#  endif
+# else
 	info->use_asm = false;
-#endif /* FLAC__NO_ASM */
+# endif
+
+/*
+ * unknown CPI
+ */
 #else
 	info->type = FLAC__CPUINFO_TYPE_UNKNOWN;
 	info->use_asm = false;
diff --git a/FLAC/crc.c b/FLAC/crc.c
index 49274eab87..88fbc2e74f 100644
--- a/FLAC/crc.c
+++ b/FLAC/crc.c
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,6 +29,10 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
 #include "private/crc.h"
 
 /* CRC-8, poly = x^8 + x^2 + x^1 + x^0, init = 0 */
@@ -70,7 +74,7 @@ FLAC__byte const FLAC__crc8_table[256] = {
 
 /* CRC-16, poly = x^16 + x^15 + x^2 + x^0, init = 0 */
 
-FLAC__uint16 FLAC__crc16_table[256] = {
+unsigned FLAC__crc16_table[256] = {
 	0x0000,  0x8005,  0x800f,  0x000a,  0x801b,  0x001e,  0x0014,  0x8011,
 	0x8033,  0x0036,  0x003c,  0x8039,  0x0028,  0x802d,  0x8027,  0x0022,
 	0x8063,  0x0066,  0x006c,  0x8069,  0x0078,  0x807d,  0x8077,  0x0072,
@@ -127,23 +131,12 @@ FLAC__uint8 FLAC__crc8(const FLAC__byte *data, unsigned len)
 	return crc;
 }
 
-void FLAC__crc16_update(const FLAC__byte data, FLAC__uint16 *crc)
+unsigned FLAC__crc16(const FLAC__byte *data, unsigned len)
 {
-	*crc = (*crc<<8) ^ FLAC__crc16_table[(*crc>>8) ^ data];
-}
-
-void FLAC__crc16_update_block(const FLAC__byte *data, unsigned len, FLAC__uint16 *crc)
-{
-	while(len--)
-		*crc = (*crc<<8) ^ FLAC__crc16_table[(*crc>>8) ^ *data++];
-}
-
-FLAC__uint16 FLAC__crc16(const FLAC__byte *data, unsigned len)
-{
-	FLAC__uint16 crc = 0;
+	unsigned crc = 0;
 
 	while(len--)
-		crc = (crc<<8) ^ FLAC__crc16_table[(crc>>8) ^ *data++];
+		crc = ((crc<<8) ^ FLAC__crc16_table[(crc>>8) ^ *data++]) & 0xffff;
 
 	return crc;
 }
diff --git a/FLAC/fixed.c b/FLAC/fixed.c
index 6824e93716..2f9dee3691 100644
--- a/FLAC/fixed.c
+++ b/FLAC/fixed.c
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,7 +29,12 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
 #include <math.h>
+#include <string.h>
 #include "private/bitmath.h"
 #include "private/fixed.h"
 #include "FLAC/assert.h"
@@ -351,32 +356,36 @@ void FLAC__fixed_compute_residual(const FLAC__int32 data[], unsigned data_len, u
 
 	switch(order) {
 		case 0:
-			for(i = 0; i < idata_len; i++) {
-				residual[i] = data[i];
-			}
+			FLAC__ASSERT(sizeof(residual[0]) == sizeof(data[0]));
+			memcpy(residual, data, sizeof(residual[0])*data_len);
 			break;
 		case 1:
-			for(i = 0; i < idata_len; i++) {
+			for(i = 0; i < idata_len; i++)
 				residual[i] = data[i] - data[i-1];
-			}
 			break;
 		case 2:
-			for(i = 0; i < idata_len; i++) {
-				/* == data[i] - 2*data[i-1] + data[i-2] */
+			for(i = 0; i < idata_len; i++)
+#if 1 /* OPT: may be faster with some compilers on some systems */
 				residual[i] = data[i] - (data[i-1] << 1) + data[i-2];
-			}
+#else
+				residual[i] = data[i] - 2*data[i-1] + data[i-2];
+#endif
 			break;
 		case 3:
-			for(i = 0; i < idata_len; i++) {
-				/* == data[i] - 3*data[i-1] + 3*data[i-2] - data[i-3] */
+			for(i = 0; i < idata_len; i++)
+#if 1 /* OPT: may be faster with some compilers on some systems */
 				residual[i] = data[i] - (((data[i-1]-data[i-2])<<1) + (data[i-1]-data[i-2])) - data[i-3];
-			}
+#else
+				residual[i] = data[i] - 3*data[i-1] + 3*data[i-2] - data[i-3];
+#endif
 			break;
 		case 4:
-			for(i = 0; i < idata_len; i++) {
-				/* == data[i] - 4*data[i-1] + 6*data[i-2] - 4*data[i-3] + data[i-4] */
+			for(i = 0; i < idata_len; i++)
+#if 1 /* OPT: may be faster with some compilers on some systems */
 				residual[i] = data[i] - ((data[i-1]+data[i-3])<<2) + ((data[i-2]<<2) + (data[i-2]<<1)) + data[i-4];
-			}
+#else
+				residual[i] = data[i] - 4*data[i-1] + 6*data[i-2] - 4*data[i-3] + data[i-4];
+#endif
 			break;
 		default:
 			FLAC__ASSERT(0);
@@ -389,32 +398,36 @@ void FLAC__fixed_restore_signal(const FLAC__int32 residual[], unsigned data_len,
 
 	switch(order) {
 		case 0:
-			for(i = 0; i < idata_len; i++) {
-				data[i] = residual[i];
-			}
+			FLAC__ASSERT(sizeof(residual[0]) == sizeof(data[0]));
+			memcpy(data, residual, sizeof(residual[0])*data_len);
 			break;
 		case 1:
-			for(i = 0; i < idata_len; i++) {
+			for(i = 0; i < idata_len; i++)
 				data[i] = residual[i] + data[i-1];
-			}
 			break;
 		case 2:
-			for(i = 0; i < idata_len; i++) {
-				/* == residual[i] + 2*data[i-1] - data[i-2] */
+			for(i = 0; i < idata_len; i++)
+#if 1 /* OPT: may be faster with some compilers on some systems */
 				data[i] = residual[i] + (data[i-1]<<1) - data[i-2];
-			}
+#else
+				data[i] = residual[i] + 2*data[i-1] - data[i-2];
+#endif
 			break;
 		case 3:
-			for(i = 0; i < idata_len; i++) {
-				/* residual[i] + 3*data[i-1] - 3*data[i-2]) + data[i-3] */
+			for(i = 0; i < idata_len; i++)
+#if 1 /* OPT: may be faster with some compilers on some systems */
 				data[i] = residual[i] + (((data[i-1]-data[i-2])<<1) + (data[i-1]-data[i-2])) + data[i-3];
-			}
+#else
+				data[i] = residual[i] + 3*data[i-1] - 3*data[i-2] + data[i-3];
+#endif
 			break;
 		case 4:
-			for(i = 0; i < idata_len; i++) {
-				/* == residual[i] + 4*data[i-1] - 6*data[i-2] + 4*data[i-3] - data[i-4] */
+			for(i = 0; i < idata_len; i++)
+#if 1 /* OPT: may be faster with some compilers on some systems */
 				data[i] = residual[i] + ((data[i-1]+data[i-3])<<2) - ((data[i-2]<<2) + (data[i-2]<<1)) - data[i-4];
-			}
+#else
+				data[i] = residual[i] + 4*data[i-1] - 6*data[i-2] + 4*data[i-3] - data[i-4];
+#endif
 			break;
 		default:
 			FLAC__ASSERT(0);
diff --git a/FLAC/format.c b/FLAC/format.c
index af41d1e441..0fdbb61e85 100644
--- a/FLAC/format.c
+++ b/FLAC/format.c
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,14 +29,19 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
 #include <stdio.h>
 #include <stdlib.h> /* for qsort() */
+#include <string.h> /* for memset() */
 #include "FLAC/assert.h"
 #include "FLAC/format.h"
 #include "private/format.h"
 
-#ifdef HAVE_CONFIG_H
-#include <config.h>
+#ifndef FLaC__INLINE
+#define FLaC__INLINE
 #endif
 
 #ifdef min
@@ -52,13 +57,13 @@
 #endif
 
 /* VERSION should come from configure */
-FLAC_API const char *FLAC__VERSION_STRING = "1.1.2";
+FLAC_API const char *FLAC__VERSION_STRING = "1.2.1";
 
-#if defined _MSC_VER || defined __MINW32__
+#if defined _MSC_VER || defined __BORLANDC__ || defined __MINW32__
 /* yet one more hack because of MSVC6: */
-FLAC_API const char *FLAC__VENDOR_STRING = "reference libFLAC 1.1.2 20050205";
+FLAC_API const char *FLAC__VENDOR_STRING = "reference libFLAC 1.1.2 20070917";
 #else
-FLAC_API const char *FLAC__VENDOR_STRING = "reference libFLAC " VERSION " 20050205";
+FLAC_API const char *FLAC__VENDOR_STRING = "reference libFLAC " VERSION " 20070917";
 #endif
 
 FLAC_API const FLAC__byte FLAC__STREAM_SYNC_STRING[4] = { 'f','L','a','C' };
@@ -104,13 +109,23 @@ FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_IS_CD_LEN = 1; /* bit */
 FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_RESERVED_LEN = 7+258*8; /* bits */
 FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_NUM_TRACKS_LEN = 8; /* bits */
 
+FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_TYPE_LEN = 32; /* bits */
+FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_MIME_TYPE_LENGTH_LEN = 32; /* bits */
+FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_DESCRIPTION_LENGTH_LEN = 32; /* bits */
+FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_WIDTH_LEN = 32; /* bits */
+FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_HEIGHT_LEN = 32; /* bits */
+FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_DEPTH_LEN = 32; /* bits */
+FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_COLORS_LEN = 32; /* bits */
+FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_DATA_LENGTH_LEN = 32; /* bits */
+
 FLAC_API const unsigned FLAC__STREAM_METADATA_IS_LAST_LEN = 1; /* bits */
 FLAC_API const unsigned FLAC__STREAM_METADATA_TYPE_LEN = 7; /* bits */
 FLAC_API const unsigned FLAC__STREAM_METADATA_LENGTH_LEN = 24; /* bits */
 
 FLAC_API const unsigned FLAC__FRAME_HEADER_SYNC = 0x3ffe;
 FLAC_API const unsigned FLAC__FRAME_HEADER_SYNC_LEN = 14; /* bits */
-FLAC_API const unsigned FLAC__FRAME_HEADER_RESERVED_LEN = 2; /* bits */
+FLAC_API const unsigned FLAC__FRAME_HEADER_RESERVED_LEN = 1; /* bits */
+FLAC_API const unsigned FLAC__FRAME_HEADER_BLOCKING_STRATEGY_LEN = 1; /* bits */
 FLAC_API const unsigned FLAC__FRAME_HEADER_BLOCK_SIZE_LEN = 4; /* bits */
 FLAC_API const unsigned FLAC__FRAME_HEADER_SAMPLE_RATE_LEN = 4; /* bits */
 FLAC_API const unsigned FLAC__FRAME_HEADER_CHANNEL_ASSIGNMENT_LEN = 4; /* bits */
@@ -123,12 +138,15 @@ FLAC_API const unsigned FLAC__FRAME_FOOTER_CRC_LEN = 16; /* bits */
 FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_TYPE_LEN = 2; /* bits */
 FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ORDER_LEN = 4; /* bits */
 FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN = 4; /* bits */
+FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_PARAMETER_LEN = 5; /* bits */
 FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_RAW_LEN = 5; /* bits */
 
 FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER = 15; /* == (1<<FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN)-1 */
+FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_ESCAPE_PARAMETER = 31; /* == (1<<FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_PARAMETER_LEN)-1 */
 
 FLAC_API const char * const FLAC__EntropyCodingMethodTypeString[] = {
-	"PARTITIONED_RICE"
+	"PARTITIONED_RICE",
+	"PARTITIONED_RICE2"
 };
 
 FLAC_API const unsigned FLAC__SUBFRAME_LPC_QLP_COEFF_PRECISION_LEN = 4; /* bits */
@@ -168,14 +186,47 @@ FLAC_API const char * const FLAC__MetadataTypeString[] = {
 	"APPLICATION",
 	"SEEKTABLE",
 	"VORBIS_COMMENT",
-	"CUESHEET"
+	"CUESHEET",
+	"PICTURE"
+};
+
+FLAC_API const char * const FLAC__StreamMetadata_Picture_TypeString[] = {
+	"Other",
+	"32x32 pixels 'file icon' (PNG only)",
+	"Other file icon",
+	"Cover (front)",
+	"Cover (back)",
+	"Leaflet page",
+	"Media (e.g. label side of CD)",
+	"Lead artist/lead performer/soloist",
+	"Artist/performer",
+	"Conductor",
+	"Band/Orchestra",
+	"Composer",
+	"Lyricist/text writer",
+	"Recording Location",
+	"During recording",
+	"During performance",
+	"Movie/video screen capture",
+	"A bright coloured fish",
+	"Illustration",
+	"Band/artist logotype",
+	"Publisher/Studio logotype"
 };
 
 FLAC_API FLAC__bool FLAC__format_sample_rate_is_valid(unsigned sample_rate)
+{
+	if(sample_rate == 0 || sample_rate > FLAC__MAX_SAMPLE_RATE) {
+		return false;
+	}
+	else
+		return true;
+}
+
+FLAC_API FLAC__bool FLAC__format_sample_rate_is_subset(unsigned sample_rate)
 {
 	if(
-		sample_rate == 0 ||
-		sample_rate > FLAC__MAX_SAMPLE_RATE ||
+		!FLAC__format_sample_rate_is_valid(sample_rate) ||
 		(
 			sample_rate >= (1u << 16) &&
 			!(sample_rate % 1000 == 0 || sample_rate % 10 == 0)
@@ -187,6 +238,7 @@ FLAC_API FLAC__bool FLAC__format_sample_rate_is_valid(unsigned sample_rate)
 		return true;
 }
 
+/* @@@@ add to unit tests; it is already indirectly tested by the metadata_object tests */
 FLAC_API FLAC__bool FLAC__format_seektable_is_legal(const FLAC__StreamMetadata_SeekTable *seek_table)
 {
 	unsigned i;
@@ -222,6 +274,7 @@ static int seekpoint_compare_(const FLAC__StreamMetadata_SeekPoint *l, const FLA
 		return 1;
 }
 
+/* @@@@ add to unit tests; it is already indirectly tested by the metadata_object tests */
 FLAC_API unsigned FLAC__format_seektable_sort(FLAC__StreamMetadata_SeekTable *seek_table)
 {
 	unsigned i, j;
@@ -260,7 +313,7 @@ FLAC_API unsigned FLAC__format_seektable_sort(FLAC__StreamMetadata_SeekTable *se
  * and a more clear explanation at the end of this section:
  *   http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  */
-static __inline unsigned utf8len_(const FLAC__byte *utf8)
+static FLaC__INLINE unsigned utf8len_(const FLAC__byte *utf8)
 {
 	FLAC__ASSERT(0 != utf8);
 	if ((utf8[0] & 0x80) == 0) {
@@ -359,6 +412,7 @@ FLAC_API FLAC__bool FLAC__format_vorbiscomment_entry_is_legal(const FLAC__byte *
 	return true;
 }
 
+/* @@@@ add to unit tests; it is already indirectly tested by the metadata_object tests */
 FLAC_API FLAC__bool FLAC__format_cuesheet_is_legal(const FLAC__StreamMetadata_CueSheet *cue_sheet, FLAC__bool check_cd_da_subset, const char **violation)
 {
 	unsigned i, j;
@@ -437,6 +491,31 @@ FLAC_API FLAC__bool FLAC__format_cuesheet_is_legal(const FLAC__StreamMetadata_Cu
 	return true;
 }
 
+/* @@@@ add to unit tests; it is already indirectly tested by the metadata_object tests */
+FLAC_API FLAC__bool FLAC__format_picture_is_legal(const FLAC__StreamMetadata_Picture *picture, const char **violation)
+{
+	char *p;
+	FLAC__byte *b;
+
+	for(p = picture->mime_type; *p; p++) {
+		if(*p < 0x20 || *p > 0x7e) {
+			if(violation) *violation = "MIME type string must contain only printable ASCII characters (0x20-0x7e)";
+			return false;
+		}
+	}
+
+	for(b = picture->description; *b; ) {
+		unsigned n = utf8len_(b);
+		if(n == 0) {
+			if(violation) *violation = "description string must be valid UTF-8";
+			return false;
+		}
+		b += n;
+	}
+
+	return true;
+}
+
 /*
  * These routines are private to libFLAC
  */
@@ -506,6 +585,7 @@ FLAC__bool FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_s
 			return false;
 		if(0 == (object->raw_bits = (unsigned*)realloc(object->raw_bits, sizeof(unsigned) << max_partition_order)))
 			return false;
+		memset(object->raw_bits, 0, sizeof(unsigned)*(1 << max_partition_order));
 		object->capacity_by_order = max_partition_order;
 	}
 
diff --git a/FLAC/ia32/bitreader_asm.nasm b/FLAC/ia32/bitreader_asm.nasm
new file mode 100644
index 0000000000..93c1196c26
--- /dev/null
+++ b/FLAC/ia32/bitreader_asm.nasm
@@ -0,0 +1,568 @@
+;  vim:filetype=nasm ts=8
+
+;  libFLAC - Free Lossless Audio Codec library
+;  Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+;
+;  Redistribution and use in source and binary forms, with or without
+;  modification, are permitted provided that the following conditions
+;  are met:
+;
+;  - Redistributions of source code must retain the above copyright
+;  notice, this list of conditions and the following disclaimer.
+;
+;  - Redistributions in binary form must reproduce the above copyright
+;  notice, this list of conditions and the following disclaimer in the
+;  documentation and/or other materials provided with the distribution.
+;
+;  - Neither the name of the Xiph.org Foundation nor the names of its
+;  contributors may be used to endorse or promote products derived from
+;  this software without specific prior written permission.
+;
+;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+;  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+;  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+;  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+%include "ia32/nasm.h"
+
+	data_section
+
+cextern FLAC__crc16_table		; unsigned FLAC__crc16_table[256];
+cextern bitreader_read_from_client_	; FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br);
+
+cglobal FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
+
+	code_section
+
+
+; **********************************************************************
+;
+; void FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter)
+;
+; Some details like assertions and other checking is performed by the caller.
+	ALIGN 16
+cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap
+
+	;ASSERT(0 != br);
+	;ASSERT(0 != br->buffer);
+	; WATCHOUT: code only works if sizeof(brword)==32; we can make things much faster with this assertion
+	;ASSERT(FLAC__BITS_PER_WORD == 32);
+	;ASSERT(parameter < 32);
+	; the above two asserts also guarantee that the binary part never straddles more than 2 words, so we don't have to loop to read it
+
+	;; peppered throughout the code at major checkpoints are keys like this as to where things are at that point in time
+	;; [esp + 16]	unsigned parameter
+	;; [esp + 12]	unsigned nvals
+	;; [esp + 8]	int vals[]
+	;; [esp + 4]	FLAC__BitReader *br
+	mov	eax, [esp + 12]		; if(nvals == 0)
+	test	eax, eax
+	ja	.nvals_gt_0
+	mov	eax, 1			;   return true;
+	ret
+
+.nvals_gt_0:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	sub	esp, 4
+	;; [esp + 36]	unsigned parameter
+	;; [esp + 32]	unsigned nvals
+	;; [esp + 28]	int vals[]
+	;; [esp + 24]	FLAC__BitReader *br
+	;; [esp]	ucbits
+	mov	ebp, [esp + 24]		; ebp <- br == br->buffer
+	mov	esi, [ebp + 16]		; esi <- br->consumed_words (aka 'cwords' in the C version)
+	mov	ecx, [ebp + 20]		; ecx <- br->consumed_bits  (aka 'cbits'  in the C version)
+	xor	edi, edi		; edi <- 0  'uval'
+	;; ecx		cbits
+	;; esi		cwords
+	;; edi		uval
+	;; ebp		br
+	;; [ebp]	br->buffer
+	;; [ebp + 8]	br->words
+	;; [ebp + 12]	br->bytes
+	;; [ebp + 16]	br->consumed_words
+	;; [ebp + 20]	br->consumed_bits
+	;; [ebp + 24]	br->read_crc
+	;; [ebp + 28]	br->crc16_align
+
+					; ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits;
+	mov	eax, [ebp + 8]		;   eax <- br->words
+	sub	eax, esi		;   eax <- br->words-cwords
+	shl	eax, 2			;   eax <- (br->words-cwords)*FLAC__BYTES_PER_WORD
+	add	eax, [ebp + 12]		;   eax <- (br->words-cwords)*FLAC__BYTES_PER_WORD + br->bytes
+	shl	eax, 3			;   eax <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8
+	sub	eax, ecx		;   eax <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits
+	mov	[esp], eax		;   ucbits <- eax
+
+	ALIGN 16
+.val_loop:				; while(1) {
+
+	;
+	; read unary part
+	;
+.unary_loop:				;   while(1) {
+	;; ecx		cbits
+	;; esi		cwords
+	;; edi		uval
+	;; ebp		br
+	cmp	esi, [ebp + 8]		;     while(cwords < br->words)   /* if we've not consumed up to a partial tail word... */
+	jae	near .c1_next1
+.c1_loop:				;     {
+	mov	ebx, [ebp]
+	mov	eax, [ebx + 4*esi]	;       b = br->buffer[cwords]
+	mov	edx, eax		;       edx = br->buffer[cwords] (saved for later use)
+	shl	eax, cl 		;       b = br->buffer[cwords] << cbits
+	test	eax, eax		;         (still have to test since cbits may be 0, thus ZF not updated for shl eax,0)
+	jz	near .c1_next2		;       if(b) {
+	bsr	ebx, eax
+	not	ebx
+	and	ebx, 31			;         ebx = 'i' = # of leading 0 bits in 'b' (eax)
+	add	ecx, ebx		;         cbits += i;
+	add	edi, ebx		;         uval += i;
+	add	ecx, byte 1		;         cbits++; /* skip over stop bit */
+	test	ecx, ~31
+	jz	near .break1 		;         if(cbits >= FLAC__BITS_PER_WORD) { /* faster way of testing if(cbits == FLAC__BITS_PER_WORD) */
+					;           crc16_update_word_(br, br->buffer[cwords]);
+	push	edi			;		[need more registers]
+	bswap	edx			;		edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier
+	mov	ecx, [ebp + 28]		;		ecx <- br->crc16_align
+	mov	eax, [ebp + 24]		;		ax <- br->read_crc (a.k.a. crc)
+%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
+	mov	edi, _FLAC__crc16_table
+%else
+	mov	edi, FLAC__crc16_table
+%endif
+	;; eax (ax)	crc a.k.a. br->read_crc
+	;; ebx (bl)	intermediate result index into FLAC__crc16_table[]
+	;; ecx		br->crc16_align
+	;; edx		byteswapped brword to CRC
+	;; esi		cwords
+	;; edi		unsigned FLAC__crc16_table[]
+	;; ebp		br
+	test	ecx, ecx		;		switch(br->crc16_align) ...
+	jnz	.c0b4			;		[br->crc16_align is 0 the vast majority of the time so we optimize the common case]
+.c0b0:	xor	dl, ah			;		dl <- (crc>>8)^(word>>24)
+	movzx	ebx, dl
+	mov	ecx, [ebx*4 + edi]	;		cx <- FLAC__crc16_table[(crc>>8)^(word>>24)]
+	shl	eax, 8			;		ax <- (crc<<8)
+	xor	eax, ecx		;		crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)]
+.c0b1:	xor	dh, ah			;		dh <- (crc>>8)^((word>>16)&0xff))
+	movzx	ebx, dh
+	mov	ecx, [ebx*4 + edi]	;		cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
+	shl	eax, 8			;		ax <- (crc<<8)
+	xor	eax, ecx		;		crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
+	shr	edx, 16
+.c0b2:	xor	dl, ah			;		dl <- (crc>>8)^((word>>8)&0xff))
+	movzx	ebx, dl
+	mov	ecx, [ebx*4 + edi]	;		cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
+	shl	eax, 8			;		ax <- (crc<<8)
+	xor	eax, ecx		;		crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
+.c0b3:	xor	dh, ah			;		dh <- (crc>>8)^(word&0xff)
+	movzx	ebx, dh
+	mov	ecx, [ebx*4 + edi]	;		cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)]
+	shl	eax, 8			;		ax <- (crc<<8)
+	xor	eax, ecx		;		crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)]
+	movzx	eax, ax
+	mov	[ebp + 24], eax		;		br->read_crc <- crc
+	pop	edi
+
+	add	esi, byte 1		;           cwords++;
+	xor	ecx, ecx		;           cbits = 0;
+					;         }
+	jmp	near .break1		;         goto break1;
+	;; this section relocated out of the way for performance
+.c0b4:
+	mov	[ebp + 28], dword 0	;		br->crc16_align <- 0
+	cmp	ecx, 8
+	je	.c0b1
+	shr	edx, 16
+	cmp	ecx, 16
+	je	.c0b2
+	jmp	.c0b3
+
+	;; this section relocated out of the way for performance
+.c1b4:
+	mov	[ebp + 28], dword 0	;		br->crc16_align <- 0
+	cmp	ecx, 8
+	je	.c1b1
+	shr	edx, 16
+	cmp	ecx, 16
+	je	.c1b2
+	jmp	.c1b3
+
+.c1_next2:				;       } else {
+	;; ecx		cbits
+	;; edx		current brword 'b'
+	;; esi		cwords
+	;; edi		uval
+	;; ebp		br
+	add	edi, 32
+	sub	edi, ecx		;         uval += FLAC__BITS_PER_WORD - cbits;
+					;         crc16_update_word_(br, br->buffer[cwords]);
+	push	edi			;		[need more registers]
+	bswap	edx			;		edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier
+	mov	ecx, [ebp + 28]		;		ecx <- br->crc16_align
+	mov	eax, [ebp + 24]		;		ax <- br->read_crc (a.k.a. crc)
+%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
+	mov	edi, _FLAC__crc16_table
+%else
+	mov	edi, FLAC__crc16_table
+%endif
+	;; eax (ax)	crc a.k.a. br->read_crc
+	;; ebx (bl)	intermediate result index into FLAC__crc16_table[]
+	;; ecx		br->crc16_align
+	;; edx		byteswapped brword to CRC
+	;; esi		cwords
+	;; edi		unsigned FLAC__crc16_table[]
+	;; ebp		br
+	test	ecx, ecx		;		switch(br->crc16_align) ...
+	jnz	.c1b4			;		[br->crc16_align is 0 the vast majority of the time so we optimize the common case]
+.c1b0:	xor	dl, ah			;		dl <- (crc>>8)^(word>>24)
+	movzx	ebx, dl
+	mov	ecx, [ebx*4 + edi]	;		cx <- FLAC__crc16_table[(crc>>8)^(word>>24)]
+	shl	eax, 8			;		ax <- (crc<<8)
+	xor	eax, ecx		;		crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)]
+.c1b1:	xor	dh, ah			;		dh <- (crc>>8)^((word>>16)&0xff))
+	movzx	ebx, dh
+	mov	ecx, [ebx*4 + edi]	;		cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
+	shl	eax, 8			;		ax <- (crc<<8)
+	xor	eax, ecx		;		crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
+	shr	edx, 16
+.c1b2:	xor	dl, ah			;		dl <- (crc>>8)^((word>>8)&0xff))
+	movzx	ebx, dl
+	mov	ecx, [ebx*4 + edi]	;		cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
+	shl	eax, 8			;		ax <- (crc<<8)
+	xor	eax, ecx		;		crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
+.c1b3:	xor	dh, ah			;		dh <- (crc>>8)^(word&0xff)
+	movzx	ebx, dh
+	mov	ecx, [ebx*4 + edi]	;		cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)]
+	shl	eax, 8			;		ax <- (crc<<8)
+	xor	eax, ecx		;		crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)]
+	movzx	eax, ax
+	mov	[ebp + 24], eax		;		br->read_crc <- crc
+	pop	edi
+
+	add	esi, byte 1		;         cwords++;
+	xor	ecx, ecx		;         cbits = 0;
+					;         /* didn't find stop bit yet, have to keep going... */
+					;       }
+
+	cmp	esi, [ebp + 8]		;     } while(cwords < br->words)   /* if we've not consumed up to a partial tail word... */
+	jb	near .c1_loop
+
+.c1_next1:
+	; at this point we've eaten up all the whole words; have to try
+	; reading through any tail bytes before calling the read callback.
+	; this is a repeat of the above logic adjusted for the fact we
+	; don't have a whole word.  note though if the client is feeding
+	; us data a byte at a time (unlikely), br->consumed_bits may not
+	; be zero.
+	;; ecx		cbits
+	;; esi		cwords
+	;; edi		uval
+	;; ebp		br
+	mov	edx, [ebp + 12]		;     edx <- br->bytes
+	test	edx, edx
+	jz	.read1			;     if(br->bytes) {  [NOTE: this case is rare so it doesn't have to be all that fast ]
+	mov	ebx, [ebp]
+	shl	edx, 3			;       edx <- const unsigned end = br->bytes * 8;
+	mov	eax, [ebx + 4*esi]	;       b = br->buffer[cwords]
+	xchg	edx, ecx		;       [edx <- cbits , ecx <- end]
+	mov	ebx, 0xffffffff		;       ebx <- FLAC__WORD_ALL_ONES
+	shr	ebx, cl			;       ebx <- FLAC__WORD_ALL_ONES >> end
+	not	ebx			;       ebx <- ~(FLAC__WORD_ALL_ONES >> end)
+	xchg	edx, ecx		;       [edx <- end , ecx <- cbits]
+	and	eax, ebx		;       b = (br->buffer[cwords] & ~(FLAC__WORD_ALL_ONES >> end));
+	shl	eax, cl 		;       b = (br->buffer[cwords] & ~(FLAC__WORD_ALL_ONES >> end)) << cbits;
+	test	eax, eax		;         (still have to test since cbits may be 0, thus ZF not updated for shl eax,0)
+	jz	.c1_next3		;       if(b) {
+	bsr	ebx, eax
+	not	ebx
+	and	ebx, 31			;         ebx = 'i' = # of leading 0 bits in 'b' (eax)
+	add	ecx, ebx		;         cbits += i;
+	add	edi, ebx		;         uval += i;
+	add	ecx, byte 1		;         cbits++; /* skip over stop bit */
+	jmp	short .break1 		;         goto break1;
+.c1_next3:				;       } else {
+	sub	edi, ecx
+	add	edi, edx		;         uval += end - cbits;
+	add	ecx, edx		;         cbits += end
+					;         /* didn't find stop bit yet, have to keep going... */
+					;       }
+					;     }
+.read1:
+	; flush registers and read; bitreader_read_from_client_() does
+	; not touch br->consumed_bits at all but we still need to set
+	; it in case it fails and we have to return false.
+	;; ecx		cbits
+	;; esi		cwords
+	;; edi		uval
+	;; ebp		br
+	mov	[ebp + 16], esi		;     br->consumed_words = cwords;
+	mov	[ebp + 20], ecx		;     br->consumed_bits = cbits;
+	push	ecx			;     /* save */
+	push	ebp			;     /* push br argument */
+%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
+	call	_bitreader_read_from_client_
+%else
+	call	bitreader_read_from_client_
+%endif
+	pop	edx			;     /* discard, unused */
+	pop	ecx			;     /* restore */
+	mov	esi, [ebp + 16]		;     cwords = br->consumed_words;
+					;     ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits;
+	mov	ebx, [ebp + 8]		;       ebx <- br->words
+	sub	ebx, esi		;       ebx <- br->words-cwords
+	shl	ebx, 2			;       ebx <- (br->words-cwords)*FLAC__BYTES_PER_WORD
+	add	ebx, [ebp + 12]		;       ebx <- (br->words-cwords)*FLAC__BYTES_PER_WORD + br->bytes
+	shl	ebx, 3			;       ebx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8
+	sub	ebx, ecx		;       ebx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits
+	add	ebx, edi		;       ebx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits + uval
+					;           + uval to offset our count by the # of unary bits already
+					;           consumed before the read, because we will add these back
+					;           in all at once at break1
+	mov	[esp], ebx		;       ucbits <- ebx
+	test	eax, eax		;     if(!bitreader_read_from_client_(br))
+	jnz	near .unary_loop
+	jmp	.end			;       return false; /* eax (the return value) is already 0 */
+					;   } /* end while(1) unary part */
+
+	ALIGN 16
+.break1:
+	;; ecx		cbits
+	;; esi		cwords
+	;; edi		uval
+	;; ebp		br
+	;; [esp]	ucbits
+	sub	[esp], edi		;   ucbits -= uval;
+	sub	dword [esp], byte 1	;   ucbits--; /* account for stop bit */
+
+	;
+	; read binary part
+	;
+	mov	ebx, [esp + 36]		;   ebx <- parameter
+	test	ebx, ebx		;   if(parameter) {
+	jz	near .break2
+.read2:
+	cmp	[esp], ebx		;     while(ucbits < parameter) {
+	jae	.c2_next1
+	; flush registers and read; bitreader_read_from_client_() does
+	; not touch br->consumed_bits at all but we still need to set
+	; it in case it fails and we have to return false.
+	mov	[ebp + 16], esi		;       br->consumed_words = cwords;
+	mov	[ebp + 20], ecx		;       br->consumed_bits = cbits;
+	push	ecx			;       /* save */
+	push	ebp			;       /* push br argument */
+%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
+	call	_bitreader_read_from_client_
+%else
+	call	bitreader_read_from_client_
+%endif
+	pop	edx			;       /* discard, unused */
+	pop	ecx			;       /* restore */
+	mov	esi, [ebp + 16]		;       cwords = br->consumed_words;
+					;       ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits;
+	mov	edx, [ebp + 8]		;         edx <- br->words
+	sub	edx, esi		;         edx <- br->words-cwords
+	shl	edx, 2			;         edx <- (br->words-cwords)*FLAC__BYTES_PER_WORD
+	add	edx, [ebp + 12]		;         edx <- (br->words-cwords)*FLAC__BYTES_PER_WORD + br->bytes
+	shl	edx, 3			;         edx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8
+	sub	edx, ecx		;         edx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits
+	mov	[esp], edx		;         ucbits <- edx
+	test	eax, eax		;       if(!bitreader_read_from_client_(br))
+	jnz	.read2
+	jmp	.end			;         return false; /* eax (the return value) is already 0 */
+					;     }
+.c2_next1:
+	;; ebx		parameter
+	;; ecx		cbits
+	;; esi		cwords
+	;; edi		uval
+	;; ebp		br
+	;; [esp]	ucbits
+	cmp	esi, [ebp + 8]		;     if(cwords < br->words) { /* if we've not consumed up to a partial tail word... */
+	jae	near .c2_next2
+	test	ecx, ecx		;       if(cbits) {
+	jz	near .c2_next3		;         /* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
+	mov	eax, 32
+	mov	edx, [ebp]
+	sub	eax, ecx		;         const unsigned n = FLAC__BITS_PER_WORD - cbits;
+	mov	edx, [edx + 4*esi]	;         const brword word = br->buffer[cwords];
+	cmp	ebx, eax		;         if(parameter < n) {
+	jae	.c2_next4
+					;           uval <<= parameter;
+					;           uval |= (word & (FLAC__WORD_ALL_ONES >> cbits)) >> (n-parameter);
+	shl	edx, cl
+	xchg	ebx, ecx
+	shld	edi, edx, cl
+	add	ebx, ecx		;           cbits += parameter;
+	xchg	ebx, ecx		;           ebx <- parameter, ecx <- cbits
+	jmp	.break2			;           goto break2;
+					;         }
+.c2_next4:
+					;         uval <<= n;
+					;         uval |= word & (FLAC__WORD_ALL_ONES >> cbits);
+%if 1
+	rol	edx, cl			;            @@@@@@OPT: may be faster to use rol to save edx so we can restore it for CRC'ing
+					;            @@@@@@OPT: or put parameter in ch instead and free up ebx completely again
+%else
+	shl	edx, cl
+%endif
+	xchg	eax, ecx
+	shld	edi, edx, cl
+	xchg	eax, ecx
+%if 1
+	ror	edx, cl			;            restored.
+%else
+	mov	edx, [ebp]
+	mov	edx, [edx + 4*esi]
+%endif
+					;         crc16_update_word_(br, br->buffer[cwords]);
+	push	edi			;		[need more registers]
+	push	ebx			;		[need more registers]
+	push	eax			;		[need more registers]
+	bswap	edx			;		edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier
+	mov	ecx, [ebp + 28]		;		ecx <- br->crc16_align
+	mov	eax, [ebp + 24]		;		ax <- br->read_crc (a.k.a. crc)
+%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
+	mov	edi, _FLAC__crc16_table
+%else
+	mov	edi, FLAC__crc16_table
+%endif
+	;; eax (ax)	crc a.k.a. br->read_crc
+	;; ebx (bl)	intermediate result index into FLAC__crc16_table[]
+	;; ecx		br->crc16_align
+	;; edx		byteswapped brword to CRC
+	;; esi		cwords
+	;; edi		unsigned FLAC__crc16_table[]
+	;; ebp		br
+	test	ecx, ecx		;		switch(br->crc16_align) ...
+	jnz	.c2b4			;		[br->crc16_align is 0 the vast majority of the time so we optimize the common case]
+.c2b0:	xor	dl, ah			;		dl <- (crc>>8)^(word>>24)
+	movzx	ebx, dl
+	mov	ecx, [ebx*4 + edi]	;		cx <- FLAC__crc16_table[(crc>>8)^(word>>24)]
+	shl	eax, 8			;		ax <- (crc<<8)
+	xor	eax, ecx		;		crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)]
+.c2b1:	xor	dh, ah			;		dh <- (crc>>8)^((word>>16)&0xff))
+	movzx	ebx, dh
+	mov	ecx, [ebx*4 + edi]	;		cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
+	shl	eax, 8			;		ax <- (crc<<8)
+	xor	eax, ecx		;		crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))]
+	shr	edx, 16
+.c2b2:	xor	dl, ah			;		dl <- (crc>>8)^((word>>8)&0xff))
+	movzx	ebx, dl
+	mov	ecx, [ebx*4 + edi]	;		cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
+	shl	eax, 8			;		ax <- (crc<<8)
+	xor	eax, ecx		;		crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))]
+.c2b3:	xor	dh, ah			;		dh <- (crc>>8)^(word&0xff)
+	movzx	ebx, dh
+	mov	ecx, [ebx*4 + edi]	;		cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)]
+	shl	eax, 8			;		ax <- (crc<<8)
+	xor	eax, ecx		;		crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)]
+	movzx	eax, ax
+	mov	[ebp + 24], eax		;		br->read_crc <- crc
+	pop	eax
+	pop	ebx
+	pop	edi
+	add	esi, byte 1		;         cwords++;
+	mov	ecx, ebx
+	sub	ecx, eax		;         cbits = parameter - n;
+	jz	.break2			;         if(cbits) { /* parameter > n, i.e. if there are still bits left to read, there have to be less than 32 so they will all be in the next word */
+					;           uval <<= cbits;
+					;           uval |= (br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits));
+	mov	eax, [ebp]
+	mov	eax, [eax + 4*esi]
+	shld	edi, eax, cl
+					;         }
+	jmp	.break2			;         goto break2;
+
+	;; this section relocated out of the way for performance
+.c2b4:
+	mov	[ebp + 28], dword 0	;		br->crc16_align <- 0
+	cmp	ecx, 8
+	je	.c2b1
+	shr	edx, 16
+	cmp	ecx, 16
+	je	.c2b2
+	jmp	.c2b3
+
+.c2_next3:				;       } else {
+	mov	ecx, ebx		;         cbits = parameter;
+					;         uval <<= cbits;
+					;         uval |= (br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits));
+	mov	eax, [ebp]
+	mov	eax, [eax + 4*esi]
+	shld	edi, eax, cl
+	jmp	.break2			;         goto break2;
+					;       }
+.c2_next2:				;     } else {
+	; in this case we're starting our read at a partial tail word;
+	; the reader has guaranteed that we have at least 'parameter'
+	; bits available to read, which makes this case simpler.
+					;       uval <<= parameter;
+					;       if(cbits) {
+					;         /* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
+					;         uval |= (br->buffer[cwords] & (FLAC__WORD_ALL_ONES >> cbits)) >> (FLAC__BITS_PER_WORD-cbits-parameter);
+					;         cbits += parameter;
+					;         goto break2;
+					;       } else {
+					;         cbits = parameter;
+					;         uval |= br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits);
+					;         goto break2;
+					;       }
+					;       the above is much shorter in assembly:
+	mov	eax, [ebp]
+	mov	eax, [eax + 4*esi]	;       eax <- br->buffer[cwords]
+	shl	eax, cl			;       eax <- br->buffer[cwords] << cbits
+	add	ecx, ebx		;       cbits += parameter
+	xchg	ebx, ecx		;       ebx <- cbits, ecx <- parameter
+	shld	edi, eax, cl		;       uval <<= parameter <<< 'parameter' bits of tail word
+	xchg	ebx, ecx		;       ebx <- parameter, ecx <- cbits
+					;     }
+					;   }
+.break2:
+	sub	[esp], ebx		;   ucbits -= parameter;
+
+	;
+	; compose the value
+	;
+	mov	ebx, [esp + 28]		;   ebx <- vals
+	mov	edx, edi		;   edx <- uval
+	and	edi, 1			;   edi <- uval & 1
+	shr	edx, 1			;   edx <- uval >> 1
+	neg	edi			;   edi <- -(int)(uval & 1)
+	xor	edx, edi		;   edx <- (uval >> 1 ^ -(int)(uval & 1))
+	mov	[ebx], edx		;   *vals <- edx
+	sub	dword [esp + 32], byte 1	;   --nvals;
+	jz	.finished		;   if(nvals == 0) /* jump to finish */
+	xor	edi, edi		;   uval = 0;
+	add	dword [esp + 28], 4	;   ++vals
+	jmp	.val_loop		; }
+
+.finished:
+	mov	[ebp + 16], esi		; br->consumed_words = cwords;
+	mov	[ebp + 20], ecx		; br->consumed_bits = cbits;
+	mov	eax, 1
+.end:
+	add	esp, 4
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+
+end
+
+%ifdef OBJ_FORMAT_elf
+	section .note.GNU-stack noalloc
+%endif
diff --git a/FLAC/ia32/cpu_asm.nasm b/FLAC/ia32/cpu_asm.nasm
index b3165c3f5b..642ca19eaa 100644
--- a/FLAC/ia32/cpu_asm.nasm
+++ b/FLAC/ia32/cpu_asm.nasm
@@ -1,100 +1,121 @@
-;  libFLAC - Free Lossless Audio Codec library
-;  Copyright (C) 2001,2002,2003,2004,2005  Josh Coalson
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;
-;  - Redistributions of source code must retain the above copyright
-;  notice, this list of conditions and the following disclaimer.
-;
-;  - Redistributions in binary form must reproduce the above copyright
-;  notice, this list of conditions and the following disclaimer in the
-;  documentation and/or other materials provided with the distribution.
-;
-;  - Neither the name of the Xiph.org Foundation nor the names of its
-;  contributors may be used to endorse or promote products derived from
-;  this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
-;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-;  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-;  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-%include "ia32/nasm.h"
-
-	data_section
-
-cglobal FLAC__cpu_info_asm_ia32
-cglobal FLAC__cpu_info_extended_amd_asm_ia32
-cglobal FLAC__cpu_info_sse_test_asm_ia32
-
-	code_section
-
-; **********************************************************************
-;
-
-have_cpuid:
-	pushfd
-	pop	eax
-	mov	edx, eax
-	xor	eax, 0x00200000
-	push	eax
-	popfd
-	pushfd
-	pop	eax
-	cmp	eax, edx
-	jz	.no_cpuid
-	mov	eax, 1
-	jmp	.end
-.no_cpuid:
-	xor	eax, eax
-.end:
-	ret
-
-cident FLAC__cpu_info_asm_ia32
-	push	ebx
-	call	have_cpuid
-	test	eax, eax
-	jz	.no_cpuid
-	mov	eax, 1
-	cpuid
-	mov	eax, edx
-	jmp	.end
-.no_cpuid:
-	xor	eax, eax
-.end
-	pop	ebx
-	ret
-
-cident FLAC__cpu_info_extended_amd_asm_ia32
-	push	ebx
-	call	have_cpuid
-	test	eax, eax
-	jz	.no_cpuid
-	mov	eax, 0x80000000
-	cpuid
-	cmp	eax, 0x80000001
-	jb	.no_cpuid
-	mov	eax, 0x80000001
-	cpuid
-	mov	eax, edx
-	jmp	.end
-.no_cpuid
-	xor	eax, eax
-.end
-	pop	ebx
-	ret
-
-cident FLAC__cpu_info_sse_test_asm_ia32
-	xorps	xmm0, xmm0
-	pop	ebx
-
-end
+;  vim:filetype=nasm ts=8
+
+;  libFLAC - Free Lossless Audio Codec library
+;  Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+;
+;  Redistribution and use in source and binary forms, with or without
+;  modification, are permitted provided that the following conditions
+;  are met:
+;
+;  - Redistributions of source code must retain the above copyright
+;  notice, this list of conditions and the following disclaimer.
+;
+;  - Redistributions in binary form must reproduce the above copyright
+;  notice, this list of conditions and the following disclaimer in the
+;  documentation and/or other materials provided with the distribution.
+;
+;  - Neither the name of the Xiph.org Foundation nor the names of its
+;  contributors may be used to endorse or promote products derived from
+;  this software without specific prior written permission.
+;
+;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+;  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+;  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+;  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+%include "ia32/nasm.h"
+
+	data_section
+
+cglobal FLAC__cpu_have_cpuid_asm_ia32
+cglobal FLAC__cpu_info_asm_ia32
+cglobal FLAC__cpu_info_extended_amd_asm_ia32
+
+	code_section
+
+; **********************************************************************
+;
+; FLAC__uint32 FLAC__cpu_have_cpuid_asm_ia32()
+;
+
+cident FLAC__cpu_have_cpuid_asm_ia32
+	push	ebx
+	pushfd
+	pop	eax
+	mov	edx, eax
+	xor	eax, 0x00200000
+	push	eax
+	popfd
+	pushfd
+	pop	eax
+	cmp	eax, edx
+	jz	.no_cpuid
+	mov	eax, 1
+	jmp	.end
+.no_cpuid:
+	xor	eax, eax
+.end:
+	pop	ebx
+	ret
+
+; **********************************************************************
+;
+; void FLAC__cpu_info_asm_ia32(FLAC__uint32 *flags_edx, FLAC__uint32 *flags_ecx)
+;
+
+cident FLAC__cpu_info_asm_ia32
+	;[esp + 8] == flags_edx
+	;[esp + 12] == flags_ecx
+
+	push	ebx
+	call	FLAC__cpu_have_cpuid_asm_ia32
+	test	eax, eax
+	jz	.no_cpuid
+	mov	eax, 1
+	cpuid
+	mov	ebx, [esp + 8]
+	mov	[ebx], edx
+	mov	ebx, [esp + 12]
+	mov	[ebx], ecx
+	jmp	.end
+.no_cpuid
+	xor	eax, eax
+	mov	ebx, [esp + 8]
+	mov	[ebx], eax
+	mov	ebx, [esp + 12]
+	mov	[ebx], eax
+.end
+	pop	ebx
+	ret
+
+cident FLAC__cpu_info_extended_amd_asm_ia32
+	push	ebx
+	call	FLAC__cpu_have_cpuid_asm_ia32
+	test	eax, eax
+	jz	.no_cpuid
+	mov	eax, 0x80000000
+	cpuid
+	cmp	eax, 0x80000001
+	jb	.no_cpuid
+	mov	eax, 0x80000001
+	cpuid
+	mov	eax, edx
+	jmp	.end
+.no_cpuid
+	xor	eax, eax
+.end
+	pop	ebx
+	ret
+
+end
+
+%ifdef OBJ_FORMAT_elf
+       section .note.GNU-stack noalloc
+%endif
diff --git a/FLAC/ia32/fixed_asm.nasm b/FLAC/ia32/fixed_asm.nasm
index c7ba38fc50..f639494b9b 100644
--- a/FLAC/ia32/fixed_asm.nasm
+++ b/FLAC/ia32/fixed_asm.nasm
@@ -1,306 +1,312 @@
-;  libFLAC - Free Lossless Audio Codec library
-;  Copyright (C) 2001,2002,2003,2004,2005  Josh Coalson
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;
-;  - Redistributions of source code must retain the above copyright
-;  notice, this list of conditions and the following disclaimer.
-;
-;  - Redistributions in binary form must reproduce the above copyright
-;  notice, this list of conditions and the following disclaimer in the
-;  documentation and/or other materials provided with the distribution.
-;
-;  - Neither the name of the Xiph.org Foundation nor the names of its
-;  contributors may be used to endorse or promote products derived from
-;  this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
-;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-;  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-;  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-%include "ia32/nasm.h"
-
-	data_section
-
-cglobal FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov
-
-	code_section
-
-; **********************************************************************
-;
-; unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 *data, unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
-; {
-; 	FLAC__int32 last_error_0 = data[-1];
-; 	FLAC__int32 last_error_1 = data[-1] - data[-2];
-; 	FLAC__int32 last_error_2 = last_error_1 - (data[-2] - data[-3]);
-; 	FLAC__int32 last_error_3 = last_error_2 - (data[-2] - 2*data[-3] + data[-4]);
-; 	FLAC__int32 error, save;
-; 	FLAC__uint32 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0;
-; 	unsigned i, order;
-;
-; 	for(i = 0; i < data_len; i++) {
-; 		error  = data[i]     ; total_error_0 += local_abs(error);                      save = error;
-; 		error -= last_error_0; total_error_1 += local_abs(error); last_error_0 = save; save = error;
-; 		error -= last_error_1; total_error_2 += local_abs(error); last_error_1 = save; save = error;
-; 		error -= last_error_2; total_error_3 += local_abs(error); last_error_2 = save; save = error;
-; 		error -= last_error_3; total_error_4 += local_abs(error); last_error_3 = save;
-; 	}
-;
-; 	if(total_error_0 < min(min(min(total_error_1, total_error_2), total_error_3), total_error_4))
-; 		order = 0;
-; 	else if(total_error_1 < min(min(total_error_2, total_error_3), total_error_4))
-; 		order = 1;
-; 	else if(total_error_2 < min(total_error_3, total_error_4))
-; 		order = 2;
-; 	else if(total_error_3 < total_error_4)
-; 		order = 3;
-; 	else
-; 		order = 4;
-;
-; 	residual_bits_per_sample[0] = (FLAC__float)((data_len > 0 && total_error_0 > 0) ? log(M_LN2 * (FLAC__double)total_error_0 / (FLAC__double)data_len) / M_LN2 : 0.0);
-; 	residual_bits_per_sample[1] = (FLAC__float)((data_len > 0 && total_error_1 > 0) ? log(M_LN2 * (FLAC__double)total_error_1 / (FLAC__double)data_len) / M_LN2 : 0.0);
-; 	residual_bits_per_sample[2] = (FLAC__float)((data_len > 0 && total_error_2 > 0) ? log(M_LN2 * (FLAC__double)total_error_2 / (FLAC__double)data_len) / M_LN2 : 0.0);
-; 	residual_bits_per_sample[3] = (FLAC__float)((data_len > 0 && total_error_3 > 0) ? log(M_LN2 * (FLAC__double)total_error_3 / (FLAC__double)data_len) / M_LN2 : 0.0);
-; 	residual_bits_per_sample[4] = (FLAC__float)((data_len > 0 && total_error_4 > 0) ? log(M_LN2 * (FLAC__double)total_error_4 / (FLAC__double)data_len) / M_LN2 : 0.0);
-;
-; 	return order;
-; }
-	ALIGN 16
-cident FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov
-
-	; esp + 36 == data[]
-	; esp + 40 == data_len
-	; esp + 44 == residual_bits_per_sample[]
-
-	push	ebp
-	push	ebx
-	push	esi
-	push	edi
-	sub	esp, byte 16
-	; qword [esp] == temp space for loading FLAC__uint64s to FPU regs
-
-	; ebx == &data[i]
-	; ecx == loop counter (i)
-	; ebp == order
-	; mm0 == total_error_1:total_error_0
-	; mm1 == total_error_2:total_error_3
-	; mm2 == :total_error_4
-	; mm3 == last_error_1:last_error_0
-	; mm4 == last_error_2:last_error_3
-
-	mov	ecx, [esp + 40]			; ecx = data_len
-	test	ecx, ecx
-	jz	near .data_len_is_0
-
-	mov	ebx, [esp + 36]			; ebx = data[]
-	movd	mm3, [ebx - 4]			; mm3 = 0:last_error_0
-	movd	mm2, [ebx - 8]			; mm2 = 0:data[-2]
-	movd	mm1, [ebx - 12]			; mm1 = 0:data[-3]
-	movd	mm0, [ebx - 16]			; mm0 = 0:data[-4]
-	movq	mm5, mm3			; mm5 = 0:last_error_0
-	psubd	mm5, mm2			; mm5 = 0:last_error_1
-	punpckldq	mm3, mm5		; mm3 = last_error_1:last_error_0
-	psubd	mm2, mm1			; mm2 = 0:data[-2] - data[-3]
-	psubd	mm5, mm2			; mm5 = 0:last_error_2
-	movq	mm4, mm5			; mm4 = 0:last_error_2
-	psubd	mm4, mm2			; mm4 = 0:last_error_2 - (data[-2] - data[-3])
-	paddd	mm4, mm1			; mm4 = 0:last_error_2 - (data[-2] - 2 * data[-3])
-	psubd	mm4, mm0			; mm4 = 0:last_error_3
-	punpckldq	mm4, mm5		; mm4 = last_error_2:last_error_3
-	pxor	mm0, mm0			; mm0 = total_error_1:total_error_0
-	pxor	mm1, mm1			; mm1 = total_error_2:total_error_3
-	pxor	mm2, mm2			; mm2 = 0:total_error_4
-
-	ALIGN 16
-.loop:
-	movd	mm7, [ebx]			; mm7 = 0:error_0
-	add	ebx, byte 4
-	movq	mm6, mm7			; mm6 = 0:error_0
-	psubd	mm7, mm3			; mm7 = :error_1
-	punpckldq	mm6, mm7		; mm6 = error_1:error_0
-	movq	mm5, mm6			; mm5 = error_1:error_0
-	movq	mm7, mm6			; mm7 = error_1:error_0
-	psubd	mm5, mm3			; mm5 = error_2:
-	movq	mm3, mm6			; mm3 = error_1:error_0	
-	psrad	mm6, 31
-	pxor	mm7, mm6
-	psubd	mm7, mm6			; mm7 = abs(error_1):abs(error_0)
-	paddd	mm0, mm7			; mm0 = total_error_1:total_error_0
-	movq	mm6, mm5			; mm6 = error_2:
-	psubd	mm5, mm4			; mm5 = error_3:
-	punpckhdq	mm5, mm6		; mm5 = error_2:error_3
-	movq	mm7, mm5			; mm7 = error_2:error_3
-	movq	mm6, mm5			; mm6 = error_2:error_3
-	psubd	mm5, mm4			; mm5 = :error_4
-	movq	mm4, mm6			; mm4 = error_2:error_3
-	psrad	mm6, 31
-	pxor	mm7, mm6
-	psubd	mm7, mm6			; mm7 = abs(error_2):abs(error_3)
-	paddd	mm1, mm7			; mm1 = total_error_2:total_error_3
-	movq	mm6, mm5			; mm6 = :error_4
-	psrad	mm5, 31
-	pxor	mm6, mm5
-	psubd	mm6, mm5			; mm6 = :abs(error_4)
-	paddd	mm2, mm6			; mm2 = :total_error_4
-	
-	dec	ecx
-	jnz	short .loop
-
-; 	if(total_error_0 < min(min(min(total_error_1, total_error_2), total_error_3), total_error_4))
-; 		order = 0;
-; 	else if(total_error_1 < min(min(total_error_2, total_error_3), total_error_4))
-; 		order = 1;
-; 	else if(total_error_2 < min(total_error_3, total_error_4))
-; 		order = 2;
-; 	else if(total_error_3 < total_error_4)
-; 		order = 3;
-; 	else
-; 		order = 4;
-	movq	mm3, mm0			; mm3 = total_error_1:total_error_0
-	movd	edi, mm2			; edi = total_error_4
-	movd	esi, mm1			; esi = total_error_3
-	movd	eax, mm0			; eax = total_error_0
-	punpckhdq	mm1, mm1		; mm1 = total_error_2:total_error_2
-	punpckhdq	mm3, mm3		; mm3 = total_error_1:total_error_1
-	movd	edx, mm1			; edx = total_error_2
-	movd	ecx, mm3			; ecx = total_error_1
-
-	xor	ebx, ebx
-	xor	ebp, ebp
-	inc	ebx
-	cmp	ecx, eax
-	cmovb	eax, ecx			; eax = min(total_error_0, total_error_1)
-	cmovbe	ebp, ebx
-	inc	ebx
-	cmp	edx, eax
-	cmovb	eax, edx			; eax = min(total_error_0, total_error_1, total_error_2)
-	cmovbe	ebp, ebx
-	inc	ebx
-	cmp	esi, eax
-	cmovb	eax, esi			; eax = min(total_error_0, total_error_1, total_error_2, total_error_3)
-	cmovbe	ebp, ebx
-	inc	ebx
-	cmp	edi, eax
-	cmovb	eax, edi			; eax = min(total_error_0, total_error_1, total_error_2, total_error_3, total_error_4)
-	cmovbe	ebp, ebx
-	movd	ebx, mm0			; ebx = total_error_0
-	emms
-
-	; 	residual_bits_per_sample[0] = (FLAC__float)((data_len > 0 && total_error_0 > 0) ? log(M_LN2 * (FLAC__double)total_error_0 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	; 	residual_bits_per_sample[1] = (FLAC__float)((data_len > 0 && total_error_1 > 0) ? log(M_LN2 * (FLAC__double)total_error_1 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	; 	residual_bits_per_sample[2] = (FLAC__float)((data_len > 0 && total_error_2 > 0) ? log(M_LN2 * (FLAC__double)total_error_2 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	; 	residual_bits_per_sample[3] = (FLAC__float)((data_len > 0 && total_error_3 > 0) ? log(M_LN2 * (FLAC__double)total_error_3 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	; 	residual_bits_per_sample[4] = (FLAC__float)((data_len > 0 && total_error_4 > 0) ? log(M_LN2 * (FLAC__double)total_error_4 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	xor	eax, eax
-	fild	dword [esp + 40]		; ST = data_len (NOTE: assumes data_len is <2gigs)
-.rbps_0:
-	test	ebx, ebx
-	jz	.total_error_0_is_0
-	fld1					; ST = 1.0 data_len
-	mov	[esp], ebx
-	mov	[esp + 4], eax			; [esp] = (FLAC__uint64)total_error_0
-	mov	ebx, [esp + 44]
-	fild	qword [esp]			; ST = total_error_0 1.0 data_len
-	fdiv	st2				; ST = total_error_0/data_len 1.0 data_len
-	fldln2					; ST = ln2 total_error_0/data_len 1.0 data_len
-	fmulp	st1				; ST = ln2*total_error_0/data_len 1.0 data_len
-	fyl2x					; ST = log2(ln2*total_error_0/data_len) data_len
-	fstp	dword [ebx]			; residual_bits_per_sample[0] = log2(ln2*total_error_0/data_len)   ST = data_len
-	jmp	short .rbps_1
-.total_error_0_is_0:
-	mov	ebx, [esp + 44]
-	mov	[ebx], eax			; residual_bits_per_sample[0] = 0.0
-.rbps_1:
-	test	ecx, ecx
-	jz	.total_error_1_is_0
-	fld1					; ST = 1.0 data_len
-	mov	[esp], ecx
-	mov	[esp + 4], eax			; [esp] = (FLAC__uint64)total_error_1
-	fild	qword [esp]			; ST = total_error_1 1.0 data_len
-	fdiv	st2				; ST = total_error_1/data_len 1.0 data_len
-	fldln2					; ST = ln2 total_error_1/data_len 1.0 data_len
-	fmulp	st1				; ST = ln2*total_error_1/data_len 1.0 data_len
-	fyl2x					; ST = log2(ln2*total_error_1/data_len) data_len
-	fstp	dword [ebx + 4]			; residual_bits_per_sample[1] = log2(ln2*total_error_1/data_len)   ST = data_len
-	jmp	short .rbps_2
-.total_error_1_is_0:
-	mov	[ebx + 4], eax			; residual_bits_per_sample[1] = 0.0
-.rbps_2:
-	test	edx, edx
-	jz	.total_error_2_is_0
-	fld1					; ST = 1.0 data_len
-	mov	[esp], edx
-	mov	[esp + 4], eax			; [esp] = (FLAC__uint64)total_error_2
-	fild	qword [esp]			; ST = total_error_2 1.0 data_len
-	fdiv	st2				; ST = total_error_2/data_len 1.0 data_len
-	fldln2					; ST = ln2 total_error_2/data_len 1.0 data_len
-	fmulp	st1				; ST = ln2*total_error_2/data_len 1.0 data_len
-	fyl2x					; ST = log2(ln2*total_error_2/data_len) data_len
-	fstp	dword [ebx + 8]			; residual_bits_per_sample[2] = log2(ln2*total_error_2/data_len)   ST = data_len
-	jmp	short .rbps_3
-.total_error_2_is_0:
-	mov	[ebx + 8], eax			; residual_bits_per_sample[2] = 0.0
-.rbps_3:
-	test	esi, esi
-	jz	.total_error_3_is_0
-	fld1					; ST = 1.0 data_len
-	mov	[esp], esi
-	mov	[esp + 4], eax			; [esp] = (FLAC__uint64)total_error_3
-	fild	qword [esp]			; ST = total_error_3 1.0 data_len
-	fdiv	st2				; ST = total_error_3/data_len 1.0 data_len
-	fldln2					; ST = ln2 total_error_3/data_len 1.0 data_len
-	fmulp	st1				; ST = ln2*total_error_3/data_len 1.0 data_len
-	fyl2x					; ST = log2(ln2*total_error_3/data_len) data_len
-	fstp	dword [ebx + 12]		; residual_bits_per_sample[3] = log2(ln2*total_error_3/data_len)   ST = data_len
-	jmp	short .rbps_4
-.total_error_3_is_0:
-	mov	[ebx + 12], eax			; residual_bits_per_sample[3] = 0.0
-.rbps_4:
-	test	edi, edi
-	jz	.total_error_4_is_0
-	fld1					; ST = 1.0 data_len
-	mov	[esp], edi
-	mov	[esp + 4], eax			; [esp] = (FLAC__uint64)total_error_4
-	fild	qword [esp]			; ST = total_error_4 1.0 data_len
-	fdiv	st2				; ST = total_error_4/data_len 1.0 data_len
-	fldln2					; ST = ln2 total_error_4/data_len 1.0 data_len
-	fmulp	st1				; ST = ln2*total_error_4/data_len 1.0 data_len
-	fyl2x					; ST = log2(ln2*total_error_4/data_len) data_len
-	fstp	dword [ebx + 16]		; residual_bits_per_sample[4] = log2(ln2*total_error_4/data_len)   ST = data_len
-	jmp	short .rbps_end
-.total_error_4_is_0:
-	mov	[ebx + 16], eax			; residual_bits_per_sample[4] = 0.0
-.rbps_end:
-	fstp	st0				; ST = [empty]
-	jmp	short .end
-.data_len_is_0:
-	; data_len == 0, so residual_bits_per_sample[*] = 0.0
-	xor	ebp, ebp
-	mov	edi, [esp + 44]
-	mov	[edi], ebp
-	mov	[edi + 4], ebp
-	mov	[edi + 8], ebp
-	mov	[edi + 12], ebp
-	mov	[edi + 16], ebp
-	add	ebp, byte 4			; order = 4
-
-.end:
-	mov	eax, ebp			; return order
-	add	esp, byte 16
-	pop	edi
-	pop	esi
-	pop	ebx
-	pop	ebp
-	ret
-
-end
+;  vim:filetype=nasm ts=8
+
+;  libFLAC - Free Lossless Audio Codec library
+;  Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+;
+;  Redistribution and use in source and binary forms, with or without
+;  modification, are permitted provided that the following conditions
+;  are met:
+;
+;  - Redistributions of source code must retain the above copyright
+;  notice, this list of conditions and the following disclaimer.
+;
+;  - Redistributions in binary form must reproduce the above copyright
+;  notice, this list of conditions and the following disclaimer in the
+;  documentation and/or other materials provided with the distribution.
+;
+;  - Neither the name of the Xiph.org Foundation nor the names of its
+;  contributors may be used to endorse or promote products derived from
+;  this software without specific prior written permission.
+;
+;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+;  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+;  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+;  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+%include "ia32/nasm.h"
+
+	data_section
+
+cglobal FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov
+
+	code_section
+
+; **********************************************************************
+;
+; unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 *data, unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
+; {
+; 	FLAC__int32 last_error_0 = data[-1];
+; 	FLAC__int32 last_error_1 = data[-1] - data[-2];
+; 	FLAC__int32 last_error_2 = last_error_1 - (data[-2] - data[-3]);
+; 	FLAC__int32 last_error_3 = last_error_2 - (data[-2] - 2*data[-3] + data[-4]);
+; 	FLAC__int32 error, save;
+; 	FLAC__uint32 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0;
+; 	unsigned i, order;
+;
+; 	for(i = 0; i < data_len; i++) {
+; 		error  = data[i]     ; total_error_0 += local_abs(error);                      save = error;
+; 		error -= last_error_0; total_error_1 += local_abs(error); last_error_0 = save; save = error;
+; 		error -= last_error_1; total_error_2 += local_abs(error); last_error_1 = save; save = error;
+; 		error -= last_error_2; total_error_3 += local_abs(error); last_error_2 = save; save = error;
+; 		error -= last_error_3; total_error_4 += local_abs(error); last_error_3 = save;
+; 	}
+;
+; 	if(total_error_0 < min(min(min(total_error_1, total_error_2), total_error_3), total_error_4))
+; 		order = 0;
+; 	else if(total_error_1 < min(min(total_error_2, total_error_3), total_error_4))
+; 		order = 1;
+; 	else if(total_error_2 < min(total_error_3, total_error_4))
+; 		order = 2;
+; 	else if(total_error_3 < total_error_4)
+; 		order = 3;
+; 	else
+; 		order = 4;
+;
+; 	residual_bits_per_sample[0] = (FLAC__float)((data_len > 0 && total_error_0 > 0) ? log(M_LN2 * (FLAC__double)total_error_0 / (FLAC__double)data_len) / M_LN2 : 0.0);
+; 	residual_bits_per_sample[1] = (FLAC__float)((data_len > 0 && total_error_1 > 0) ? log(M_LN2 * (FLAC__double)total_error_1 / (FLAC__double)data_len) / M_LN2 : 0.0);
+; 	residual_bits_per_sample[2] = (FLAC__float)((data_len > 0 && total_error_2 > 0) ? log(M_LN2 * (FLAC__double)total_error_2 / (FLAC__double)data_len) / M_LN2 : 0.0);
+; 	residual_bits_per_sample[3] = (FLAC__float)((data_len > 0 && total_error_3 > 0) ? log(M_LN2 * (FLAC__double)total_error_3 / (FLAC__double)data_len) / M_LN2 : 0.0);
+; 	residual_bits_per_sample[4] = (FLAC__float)((data_len > 0 && total_error_4 > 0) ? log(M_LN2 * (FLAC__double)total_error_4 / (FLAC__double)data_len) / M_LN2 : 0.0);
+;
+; 	return order;
+; }
+	ALIGN 16
+cident FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov
+
+	; esp + 36 == data[]
+	; esp + 40 == data_len
+	; esp + 44 == residual_bits_per_sample[]
+
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	sub	esp, byte 16
+	; qword [esp] == temp space for loading FLAC__uint64s to FPU regs
+
+	; ebx == &data[i]
+	; ecx == loop counter (i)
+	; ebp == order
+	; mm0 == total_error_1:total_error_0
+	; mm1 == total_error_2:total_error_3
+	; mm2 == :total_error_4
+	; mm3 == last_error_1:last_error_0
+	; mm4 == last_error_2:last_error_3
+
+	mov	ecx, [esp + 40]			; ecx = data_len
+	test	ecx, ecx
+	jz	near .data_len_is_0
+
+	mov	ebx, [esp + 36]			; ebx = data[]
+	movd	mm3, [ebx - 4]			; mm3 = 0:last_error_0
+	movd	mm2, [ebx - 8]			; mm2 = 0:data[-2]
+	movd	mm1, [ebx - 12]			; mm1 = 0:data[-3]
+	movd	mm0, [ebx - 16]			; mm0 = 0:data[-4]
+	movq	mm5, mm3			; mm5 = 0:last_error_0
+	psubd	mm5, mm2			; mm5 = 0:last_error_1
+	punpckldq	mm3, mm5		; mm3 = last_error_1:last_error_0
+	psubd	mm2, mm1			; mm2 = 0:data[-2] - data[-3]
+	psubd	mm5, mm2			; mm5 = 0:last_error_2
+	movq	mm4, mm5			; mm4 = 0:last_error_2
+	psubd	mm4, mm2			; mm4 = 0:last_error_2 - (data[-2] - data[-3])
+	paddd	mm4, mm1			; mm4 = 0:last_error_2 - (data[-2] - 2 * data[-3])
+	psubd	mm4, mm0			; mm4 = 0:last_error_3
+	punpckldq	mm4, mm5		; mm4 = last_error_2:last_error_3
+	pxor	mm0, mm0			; mm0 = total_error_1:total_error_0
+	pxor	mm1, mm1			; mm1 = total_error_2:total_error_3
+	pxor	mm2, mm2			; mm2 = 0:total_error_4
+
+	ALIGN 16
+.loop:
+	movd	mm7, [ebx]			; mm7 = 0:error_0
+	add	ebx, byte 4
+	movq	mm6, mm7			; mm6 = 0:error_0
+	psubd	mm7, mm3			; mm7 = :error_1
+	punpckldq	mm6, mm7		; mm6 = error_1:error_0
+	movq	mm5, mm6			; mm5 = error_1:error_0
+	movq	mm7, mm6			; mm7 = error_1:error_0
+	psubd	mm5, mm3			; mm5 = error_2:
+	movq	mm3, mm6			; mm3 = error_1:error_0	
+	psrad	mm6, 31
+	pxor	mm7, mm6
+	psubd	mm7, mm6			; mm7 = abs(error_1):abs(error_0)
+	paddd	mm0, mm7			; mm0 = total_error_1:total_error_0
+	movq	mm6, mm5			; mm6 = error_2:
+	psubd	mm5, mm4			; mm5 = error_3:
+	punpckhdq	mm5, mm6		; mm5 = error_2:error_3
+	movq	mm7, mm5			; mm7 = error_2:error_3
+	movq	mm6, mm5			; mm6 = error_2:error_3
+	psubd	mm5, mm4			; mm5 = :error_4
+	movq	mm4, mm6			; mm4 = error_2:error_3
+	psrad	mm6, 31
+	pxor	mm7, mm6
+	psubd	mm7, mm6			; mm7 = abs(error_2):abs(error_3)
+	paddd	mm1, mm7			; mm1 = total_error_2:total_error_3
+	movq	mm6, mm5			; mm6 = :error_4
+	psrad	mm5, 31
+	pxor	mm6, mm5
+	psubd	mm6, mm5			; mm6 = :abs(error_4)
+	paddd	mm2, mm6			; mm2 = :total_error_4
+	
+	dec	ecx
+	jnz	short .loop
+
+; 	if(total_error_0 < min(min(min(total_error_1, total_error_2), total_error_3), total_error_4))
+; 		order = 0;
+; 	else if(total_error_1 < min(min(total_error_2, total_error_3), total_error_4))
+; 		order = 1;
+; 	else if(total_error_2 < min(total_error_3, total_error_4))
+; 		order = 2;
+; 	else if(total_error_3 < total_error_4)
+; 		order = 3;
+; 	else
+; 		order = 4;
+	movq	mm3, mm0			; mm3 = total_error_1:total_error_0
+	movd	edi, mm2			; edi = total_error_4
+	movd	esi, mm1			; esi = total_error_3
+	movd	eax, mm0			; eax = total_error_0
+	punpckhdq	mm1, mm1		; mm1 = total_error_2:total_error_2
+	punpckhdq	mm3, mm3		; mm3 = total_error_1:total_error_1
+	movd	edx, mm1			; edx = total_error_2
+	movd	ecx, mm3			; ecx = total_error_1
+
+	xor	ebx, ebx
+	xor	ebp, ebp
+	inc	ebx
+	cmp	ecx, eax
+	cmovb	eax, ecx			; eax = min(total_error_0, total_error_1)
+	cmovbe	ebp, ebx
+	inc	ebx
+	cmp	edx, eax
+	cmovb	eax, edx			; eax = min(total_error_0, total_error_1, total_error_2)
+	cmovbe	ebp, ebx
+	inc	ebx
+	cmp	esi, eax
+	cmovb	eax, esi			; eax = min(total_error_0, total_error_1, total_error_2, total_error_3)
+	cmovbe	ebp, ebx
+	inc	ebx
+	cmp	edi, eax
+	cmovb	eax, edi			; eax = min(total_error_0, total_error_1, total_error_2, total_error_3, total_error_4)
+	cmovbe	ebp, ebx
+	movd	ebx, mm0			; ebx = total_error_0
+	emms
+
+	; 	residual_bits_per_sample[0] = (FLAC__float)((data_len > 0 && total_error_0 > 0) ? log(M_LN2 * (FLAC__double)total_error_0 / (FLAC__double)data_len) / M_LN2 : 0.0);
+	; 	residual_bits_per_sample[1] = (FLAC__float)((data_len > 0 && total_error_1 > 0) ? log(M_LN2 * (FLAC__double)total_error_1 / (FLAC__double)data_len) / M_LN2 : 0.0);
+	; 	residual_bits_per_sample[2] = (FLAC__float)((data_len > 0 && total_error_2 > 0) ? log(M_LN2 * (FLAC__double)total_error_2 / (FLAC__double)data_len) / M_LN2 : 0.0);
+	; 	residual_bits_per_sample[3] = (FLAC__float)((data_len > 0 && total_error_3 > 0) ? log(M_LN2 * (FLAC__double)total_error_3 / (FLAC__double)data_len) / M_LN2 : 0.0);
+	; 	residual_bits_per_sample[4] = (FLAC__float)((data_len > 0 && total_error_4 > 0) ? log(M_LN2 * (FLAC__double)total_error_4 / (FLAC__double)data_len) / M_LN2 : 0.0);
+	xor	eax, eax
+	fild	dword [esp + 40]		; ST = data_len (NOTE: assumes data_len is <2gigs)
+.rbps_0:
+	test	ebx, ebx
+	jz	.total_error_0_is_0
+	fld1					; ST = 1.0 data_len
+	mov	[esp], ebx
+	mov	[esp + 4], eax			; [esp] = (FLAC__uint64)total_error_0
+	mov	ebx, [esp + 44]
+	fild	qword [esp]			; ST = total_error_0 1.0 data_len
+	fdiv	st2				; ST = total_error_0/data_len 1.0 data_len
+	fldln2					; ST = ln2 total_error_0/data_len 1.0 data_len
+	fmulp	st1				; ST = ln2*total_error_0/data_len 1.0 data_len
+	fyl2x					; ST = log2(ln2*total_error_0/data_len) data_len
+	fstp	dword [ebx]			; residual_bits_per_sample[0] = log2(ln2*total_error_0/data_len)   ST = data_len
+	jmp	short .rbps_1
+.total_error_0_is_0:
+	mov	ebx, [esp + 44]
+	mov	[ebx], eax			; residual_bits_per_sample[0] = 0.0
+.rbps_1:
+	test	ecx, ecx
+	jz	.total_error_1_is_0
+	fld1					; ST = 1.0 data_len
+	mov	[esp], ecx
+	mov	[esp + 4], eax			; [esp] = (FLAC__uint64)total_error_1
+	fild	qword [esp]			; ST = total_error_1 1.0 data_len
+	fdiv	st2				; ST = total_error_1/data_len 1.0 data_len
+	fldln2					; ST = ln2 total_error_1/data_len 1.0 data_len
+	fmulp	st1				; ST = ln2*total_error_1/data_len 1.0 data_len
+	fyl2x					; ST = log2(ln2*total_error_1/data_len) data_len
+	fstp	dword [ebx + 4]			; residual_bits_per_sample[1] = log2(ln2*total_error_1/data_len)   ST = data_len
+	jmp	short .rbps_2
+.total_error_1_is_0:
+	mov	[ebx + 4], eax			; residual_bits_per_sample[1] = 0.0
+.rbps_2:
+	test	edx, edx
+	jz	.total_error_2_is_0
+	fld1					; ST = 1.0 data_len
+	mov	[esp], edx
+	mov	[esp + 4], eax			; [esp] = (FLAC__uint64)total_error_2
+	fild	qword [esp]			; ST = total_error_2 1.0 data_len
+	fdiv	st2				; ST = total_error_2/data_len 1.0 data_len
+	fldln2					; ST = ln2 total_error_2/data_len 1.0 data_len
+	fmulp	st1				; ST = ln2*total_error_2/data_len 1.0 data_len
+	fyl2x					; ST = log2(ln2*total_error_2/data_len) data_len
+	fstp	dword [ebx + 8]			; residual_bits_per_sample[2] = log2(ln2*total_error_2/data_len)   ST = data_len
+	jmp	short .rbps_3
+.total_error_2_is_0:
+	mov	[ebx + 8], eax			; residual_bits_per_sample[2] = 0.0
+.rbps_3:
+	test	esi, esi
+	jz	.total_error_3_is_0
+	fld1					; ST = 1.0 data_len
+	mov	[esp], esi
+	mov	[esp + 4], eax			; [esp] = (FLAC__uint64)total_error_3
+	fild	qword [esp]			; ST = total_error_3 1.0 data_len
+	fdiv	st2				; ST = total_error_3/data_len 1.0 data_len
+	fldln2					; ST = ln2 total_error_3/data_len 1.0 data_len
+	fmulp	st1				; ST = ln2*total_error_3/data_len 1.0 data_len
+	fyl2x					; ST = log2(ln2*total_error_3/data_len) data_len
+	fstp	dword [ebx + 12]		; residual_bits_per_sample[3] = log2(ln2*total_error_3/data_len)   ST = data_len
+	jmp	short .rbps_4
+.total_error_3_is_0:
+	mov	[ebx + 12], eax			; residual_bits_per_sample[3] = 0.0
+.rbps_4:
+	test	edi, edi
+	jz	.total_error_4_is_0
+	fld1					; ST = 1.0 data_len
+	mov	[esp], edi
+	mov	[esp + 4], eax			; [esp] = (FLAC__uint64)total_error_4
+	fild	qword [esp]			; ST = total_error_4 1.0 data_len
+	fdiv	st2				; ST = total_error_4/data_len 1.0 data_len
+	fldln2					; ST = ln2 total_error_4/data_len 1.0 data_len
+	fmulp	st1				; ST = ln2*total_error_4/data_len 1.0 data_len
+	fyl2x					; ST = log2(ln2*total_error_4/data_len) data_len
+	fstp	dword [ebx + 16]		; residual_bits_per_sample[4] = log2(ln2*total_error_4/data_len)   ST = data_len
+	jmp	short .rbps_end
+.total_error_4_is_0:
+	mov	[ebx + 16], eax			; residual_bits_per_sample[4] = 0.0
+.rbps_end:
+	fstp	st0				; ST = [empty]
+	jmp	short .end
+.data_len_is_0:
+	; data_len == 0, so residual_bits_per_sample[*] = 0.0
+	xor	ebp, ebp
+	mov	edi, [esp + 44]
+	mov	[edi], ebp
+	mov	[edi + 4], ebp
+	mov	[edi + 8], ebp
+	mov	[edi + 12], ebp
+	mov	[edi + 16], ebp
+	add	ebp, byte 4			; order = 4
+
+.end:
+	mov	eax, ebp			; return order
+	add	esp, byte 16
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+
+end
+
+%ifdef OBJ_FORMAT_elf
+       section .note.GNU-stack noalloc
+%endif
diff --git a/FLAC/ia32/lpc_asm.nasm b/FLAC/ia32/lpc_asm.nasm
index 16e36d3888..e8d02e5be2 100644
--- a/FLAC/ia32/lpc_asm.nasm
+++ b/FLAC/ia32/lpc_asm.nasm
@@ -1,1503 +1,1511 @@
-;  libFLAC - Free Lossless Audio Codec library
-;  Copyright (C) 2001,2002,2003,2004,2005  Josh Coalson
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;
-;  - Redistributions of source code must retain the above copyright
-;  notice, this list of conditions and the following disclaimer.
-;
-;  - Redistributions in binary form must reproduce the above copyright
-;  notice, this list of conditions and the following disclaimer in the
-;  documentation and/or other materials provided with the distribution.
-;
-;  - Neither the name of the Xiph.org Foundation nor the names of its
-;  contributors may be used to endorse or promote products derived from
-;  this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
-;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-;  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-;  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-%include "ia32/nasm.h"
-
-	data_section
-
-cglobal FLAC__lpc_compute_autocorrelation_asm_ia32
-cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4
-cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8
-cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12
-cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_3dnow
-cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
-cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx
-cglobal FLAC__lpc_restore_signal_asm_ia32
-cglobal FLAC__lpc_restore_signal_asm_ia32_mmx
-
-	code_section
-
-; **********************************************************************
-;
-; void FLAC__lpc_compute_autocorrelation_asm(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
-; {
-;	FLAC__real d;
-;	unsigned sample, coeff;
-;	const unsigned limit = data_len - lag;
-;
-;	FLAC__ASSERT(lag > 0);
-;	FLAC__ASSERT(lag <= data_len);
-;
-;	for(coeff = 0; coeff < lag; coeff++)
-;		autoc[coeff] = 0.0;
-;	for(sample = 0; sample <= limit; sample++) {
-;		d = data[sample];
-;		for(coeff = 0; coeff < lag; coeff++)
-;			autoc[coeff] += d * data[sample+coeff];
-;	}
-;	for(; sample < data_len; sample++) {
-;		d = data[sample];
-;		for(coeff = 0; coeff < data_len - sample; coeff++)
-;			autoc[coeff] += d * data[sample+coeff];
-;	}
-; }
-;
-	ALIGN 16
-cident FLAC__lpc_compute_autocorrelation_asm_ia32
-	;[esp + 28] == autoc[]
-	;[esp + 24] == lag
-	;[esp + 20] == data_len
-	;[esp + 16] == data[]
-
-	;ASSERT(lag > 0)
-	;ASSERT(lag <= 33)
-	;ASSERT(lag <= data_len)
-
-.begin:
-	push	esi
-	push	edi
-	push	ebx
-
-	;	for(coeff = 0; coeff < lag; coeff++)
-	;		autoc[coeff] = 0.0;
-	mov	edi, [esp + 28]			; edi == autoc
-	mov	ecx, [esp + 24]			; ecx = # of dwords (=lag) of 0 to write
-	xor	eax, eax
-	rep	stosd
-
-	;	const unsigned limit = data_len - lag;
-	mov	eax, [esp + 24]			; eax == lag
-	mov	ecx, [esp + 20]
-	sub	ecx, eax			; ecx == limit
-
-	mov	edi, [esp + 28]			; edi == autoc
-	mov	esi, [esp + 16]			; esi == data
-	inc	ecx				; we are looping <= limit so we add one to the counter
-
-	;	for(sample = 0; sample <= limit; sample++) {
-	;		d = data[sample];
-	;		for(coeff = 0; coeff < lag; coeff++)
-	;			autoc[coeff] += d * data[sample+coeff];
-	;	}
-	fld	dword [esi]			; ST = d <- data[sample]
-	; each iteration is 11 bytes so we need (-eax)*11, so we do (-12*eax + eax)
-	lea	edx, [eax + eax*2]
-	neg	edx
-	lea	edx, [eax + edx*4 + .jumper1_0 - .get_eip1]
-	call	.get_eip1
-.get_eip1:
-	pop	ebx
-	add	edx, ebx
-	inc	edx				; compensate for the shorter opcode on the last iteration
-	inc	edx				; compensate for the shorter opcode on the last iteration
-	inc	edx				; compensate for the shorter opcode on the last iteration
-	cmp	eax, 33
-	jne	.loop1_start
-	sub	edx, byte 9			; compensate for the longer opcodes on the first iteration
-.loop1_start:
-	jmp	edx
-
-	fld	st0				; ST = d d
-	fmul	dword [esi + (32*4)]		; ST = d*data[sample+32] d		WATCHOUT: not a byte displacement here!
-	fadd	dword [edi + (32*4)]		; ST = autoc[32]+d*data[sample+32] d	WATCHOUT: not a byte displacement here!
-	fstp	dword [edi + (32*4)]		; autoc[32]+=d*data[sample+32]  ST = d	WATCHOUT: not a byte displacement here!
-	fld	st0				; ST = d d
-	fmul	dword [esi + (31*4)]		; ST = d*data[sample+31] d
-	fadd	dword [edi + (31*4)]		; ST = autoc[31]+d*data[sample+31] d
-	fstp	dword [edi + (31*4)]		; autoc[31]+=d*data[sample+31]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (30*4)]		; ST = d*data[sample+30] d
-	fadd	dword [edi + (30*4)]		; ST = autoc[30]+d*data[sample+30] d
-	fstp	dword [edi + (30*4)]		; autoc[30]+=d*data[sample+30]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (29*4)]		; ST = d*data[sample+29] d
-	fadd	dword [edi + (29*4)]		; ST = autoc[29]+d*data[sample+29] d
-	fstp	dword [edi + (29*4)]		; autoc[29]+=d*data[sample+29]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (28*4)]		; ST = d*data[sample+28] d
-	fadd	dword [edi + (28*4)]		; ST = autoc[28]+d*data[sample+28] d
-	fstp	dword [edi + (28*4)]		; autoc[28]+=d*data[sample+28]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (27*4)]		; ST = d*data[sample+27] d
-	fadd	dword [edi + (27*4)]		; ST = autoc[27]+d*data[sample+27] d
-	fstp	dword [edi + (27*4)]		; autoc[27]+=d*data[sample+27]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (26*4)]		; ST = d*data[sample+26] d
-	fadd	dword [edi + (26*4)]		; ST = autoc[26]+d*data[sample+26] d
-	fstp	dword [edi + (26*4)]		; autoc[26]+=d*data[sample+26]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (25*4)]		; ST = d*data[sample+25] d
-	fadd	dword [edi + (25*4)]		; ST = autoc[25]+d*data[sample+25] d
-	fstp	dword [edi + (25*4)]		; autoc[25]+=d*data[sample+25]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (24*4)]		; ST = d*data[sample+24] d
-	fadd	dword [edi + (24*4)]		; ST = autoc[24]+d*data[sample+24] d
-	fstp	dword [edi + (24*4)]		; autoc[24]+=d*data[sample+24]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (23*4)]		; ST = d*data[sample+23] d
-	fadd	dword [edi + (23*4)]		; ST = autoc[23]+d*data[sample+23] d
-	fstp	dword [edi + (23*4)]		; autoc[23]+=d*data[sample+23]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (22*4)]		; ST = d*data[sample+22] d
-	fadd	dword [edi + (22*4)]		; ST = autoc[22]+d*data[sample+22] d
-	fstp	dword [edi + (22*4)]		; autoc[22]+=d*data[sample+22]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (21*4)]		; ST = d*data[sample+21] d
-	fadd	dword [edi + (21*4)]		; ST = autoc[21]+d*data[sample+21] d
-	fstp	dword [edi + (21*4)]		; autoc[21]+=d*data[sample+21]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (20*4)]		; ST = d*data[sample+20] d
-	fadd	dword [edi + (20*4)]		; ST = autoc[20]+d*data[sample+20] d
-	fstp	dword [edi + (20*4)]		; autoc[20]+=d*data[sample+20]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (19*4)]		; ST = d*data[sample+19] d
-	fadd	dword [edi + (19*4)]		; ST = autoc[19]+d*data[sample+19] d
-	fstp	dword [edi + (19*4)]		; autoc[19]+=d*data[sample+19]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (18*4)]		; ST = d*data[sample+18] d
-	fadd	dword [edi + (18*4)]		; ST = autoc[18]+d*data[sample+18] d
-	fstp	dword [edi + (18*4)]		; autoc[18]+=d*data[sample+18]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (17*4)]		; ST = d*data[sample+17] d
-	fadd	dword [edi + (17*4)]		; ST = autoc[17]+d*data[sample+17] d
-	fstp	dword [edi + (17*4)]		; autoc[17]+=d*data[sample+17]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (16*4)]		; ST = d*data[sample+16] d
-	fadd	dword [edi + (16*4)]		; ST = autoc[16]+d*data[sample+16] d
-	fstp	dword [edi + (16*4)]		; autoc[16]+=d*data[sample+16]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (15*4)]		; ST = d*data[sample+15] d
-	fadd	dword [edi + (15*4)]		; ST = autoc[15]+d*data[sample+15] d
-	fstp	dword [edi + (15*4)]		; autoc[15]+=d*data[sample+15]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (14*4)]		; ST = d*data[sample+14] d
-	fadd	dword [edi + (14*4)]		; ST = autoc[14]+d*data[sample+14] d
-	fstp	dword [edi + (14*4)]		; autoc[14]+=d*data[sample+14]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (13*4)]		; ST = d*data[sample+13] d
-	fadd	dword [edi + (13*4)]		; ST = autoc[13]+d*data[sample+13] d
-	fstp	dword [edi + (13*4)]		; autoc[13]+=d*data[sample+13]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (12*4)]		; ST = d*data[sample+12] d
-	fadd	dword [edi + (12*4)]		; ST = autoc[12]+d*data[sample+12] d
-	fstp	dword [edi + (12*4)]		; autoc[12]+=d*data[sample+12]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (11*4)]		; ST = d*data[sample+11] d
-	fadd	dword [edi + (11*4)]		; ST = autoc[11]+d*data[sample+11] d
-	fstp	dword [edi + (11*4)]		; autoc[11]+=d*data[sample+11]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (10*4)]		; ST = d*data[sample+10] d
-	fadd	dword [edi + (10*4)]		; ST = autoc[10]+d*data[sample+10] d
-	fstp	dword [edi + (10*4)]		; autoc[10]+=d*data[sample+10]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 9*4)]		; ST = d*data[sample+9] d
-	fadd	dword [edi + ( 9*4)]		; ST = autoc[9]+d*data[sample+9] d
-	fstp	dword [edi + ( 9*4)]		; autoc[9]+=d*data[sample+9]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 8*4)]		; ST = d*data[sample+8] d
-	fadd	dword [edi + ( 8*4)]		; ST = autoc[8]+d*data[sample+8] d
-	fstp	dword [edi + ( 8*4)]		; autoc[8]+=d*data[sample+8]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 7*4)]		; ST = d*data[sample+7] d
-	fadd	dword [edi + ( 7*4)]		; ST = autoc[7]+d*data[sample+7] d
-	fstp	dword [edi + ( 7*4)]		; autoc[7]+=d*data[sample+7]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 6*4)]		; ST = d*data[sample+6] d
-	fadd	dword [edi + ( 6*4)]		; ST = autoc[6]+d*data[sample+6] d
-	fstp	dword [edi + ( 6*4)]		; autoc[6]+=d*data[sample+6]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 5*4)]		; ST = d*data[sample+4] d
-	fadd	dword [edi + ( 5*4)]		; ST = autoc[4]+d*data[sample+4] d
-	fstp	dword [edi + ( 5*4)]		; autoc[4]+=d*data[sample+4]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 4*4)]		; ST = d*data[sample+4] d
-	fadd	dword [edi + ( 4*4)]		; ST = autoc[4]+d*data[sample+4] d
-	fstp	dword [edi + ( 4*4)]		; autoc[4]+=d*data[sample+4]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 3*4)]		; ST = d*data[sample+3] d
-	fadd	dword [edi + ( 3*4)]		; ST = autoc[3]+d*data[sample+3] d
-	fstp	dword [edi + ( 3*4)]		; autoc[3]+=d*data[sample+3]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 2*4)]		; ST = d*data[sample+2] d
-	fadd	dword [edi + ( 2*4)]		; ST = autoc[2]+d*data[sample+2] d
-	fstp	dword [edi + ( 2*4)]		; autoc[2]+=d*data[sample+2]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 1*4)]		; ST = d*data[sample+1] d
-	fadd	dword [edi + ( 1*4)]		; ST = autoc[1]+d*data[sample+1] d
-	fstp	dword [edi + ( 1*4)]		; autoc[1]+=d*data[sample+1]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi]			; ST = d*data[sample] d			WATCHOUT: no displacement byte here!
-	fadd	dword [edi]			; ST = autoc[0]+d*data[sample] d	WATCHOUT: no displacement byte here!
-	fstp	dword [edi]			; autoc[0]+=d*data[sample]  ST = d	WATCHOUT: no displacement byte here!
-.jumper1_0:
-
-	fstp	st0				; pop d, ST = empty
-	add	esi, byte 4			; sample++
-	dec	ecx
-	jz	.loop1_end
-	fld	dword [esi]			; ST = d <- data[sample]
-	jmp	edx
-.loop1_end:
-
-	;	for(; sample < data_len; sample++) {
-	;		d = data[sample];
-	;		for(coeff = 0; coeff < data_len - sample; coeff++)
-	;			autoc[coeff] += d * data[sample+coeff];
-	;	}
-	mov	ecx, [esp + 24]			; ecx <- lag
-	dec	ecx				; ecx <- lag - 1
-	jz	near .end			; skip loop if 0 (i.e. lag == 1)
-
-	fld	dword [esi]			; ST = d <- data[sample]
-	mov	eax, ecx			; eax <- lag - 1 == data_len - sample the first time through
-	; each iteration is 11 bytes so we need (-eax)*11, so we do (-12*eax + eax)
-	lea	edx, [eax + eax*2]
-	neg	edx
-	lea	edx, [eax + edx*4 + .jumper2_0 - .get_eip2]
-	call	.get_eip2
-.get_eip2:
-	pop	ebx
-	add	edx, ebx
-	inc	edx				; compensate for the shorter opcode on the last iteration
-	inc	edx				; compensate for the shorter opcode on the last iteration
-	inc	edx				; compensate for the shorter opcode on the last iteration
-	jmp	edx
-
-	fld	st0				; ST = d d
-	fmul	dword [esi + (31*4)]		; ST = d*data[sample+31] d
-	fadd	dword [edi + (31*4)]		; ST = autoc[31]+d*data[sample+31] d
-	fstp	dword [edi + (31*4)]		; autoc[31]+=d*data[sample+31]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (30*4)]		; ST = d*data[sample+30] d
-	fadd	dword [edi + (30*4)]		; ST = autoc[30]+d*data[sample+30] d
-	fstp	dword [edi + (30*4)]		; autoc[30]+=d*data[sample+30]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (29*4)]		; ST = d*data[sample+29] d
-	fadd	dword [edi + (29*4)]		; ST = autoc[29]+d*data[sample+29] d
-	fstp	dword [edi + (29*4)]		; autoc[29]+=d*data[sample+29]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (28*4)]		; ST = d*data[sample+28] d
-	fadd	dword [edi + (28*4)]		; ST = autoc[28]+d*data[sample+28] d
-	fstp	dword [edi + (28*4)]		; autoc[28]+=d*data[sample+28]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (27*4)]		; ST = d*data[sample+27] d
-	fadd	dword [edi + (27*4)]		; ST = autoc[27]+d*data[sample+27] d
-	fstp	dword [edi + (27*4)]		; autoc[27]+=d*data[sample+27]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (26*4)]		; ST = d*data[sample+26] d
-	fadd	dword [edi + (26*4)]		; ST = autoc[26]+d*data[sample+26] d
-	fstp	dword [edi + (26*4)]		; autoc[26]+=d*data[sample+26]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (25*4)]		; ST = d*data[sample+25] d
-	fadd	dword [edi + (25*4)]		; ST = autoc[25]+d*data[sample+25] d
-	fstp	dword [edi + (25*4)]		; autoc[25]+=d*data[sample+25]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (24*4)]		; ST = d*data[sample+24] d
-	fadd	dword [edi + (24*4)]		; ST = autoc[24]+d*data[sample+24] d
-	fstp	dword [edi + (24*4)]		; autoc[24]+=d*data[sample+24]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (23*4)]		; ST = d*data[sample+23] d
-	fadd	dword [edi + (23*4)]		; ST = autoc[23]+d*data[sample+23] d
-	fstp	dword [edi + (23*4)]		; autoc[23]+=d*data[sample+23]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (22*4)]		; ST = d*data[sample+22] d
-	fadd	dword [edi + (22*4)]		; ST = autoc[22]+d*data[sample+22] d
-	fstp	dword [edi + (22*4)]		; autoc[22]+=d*data[sample+22]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (21*4)]		; ST = d*data[sample+21] d
-	fadd	dword [edi + (21*4)]		; ST = autoc[21]+d*data[sample+21] d
-	fstp	dword [edi + (21*4)]		; autoc[21]+=d*data[sample+21]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (20*4)]		; ST = d*data[sample+20] d
-	fadd	dword [edi + (20*4)]		; ST = autoc[20]+d*data[sample+20] d
-	fstp	dword [edi + (20*4)]		; autoc[20]+=d*data[sample+20]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (19*4)]		; ST = d*data[sample+19] d
-	fadd	dword [edi + (19*4)]		; ST = autoc[19]+d*data[sample+19] d
-	fstp	dword [edi + (19*4)]		; autoc[19]+=d*data[sample+19]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (18*4)]		; ST = d*data[sample+18] d
-	fadd	dword [edi + (18*4)]		; ST = autoc[18]+d*data[sample+18] d
-	fstp	dword [edi + (18*4)]		; autoc[18]+=d*data[sample+18]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (17*4)]		; ST = d*data[sample+17] d
-	fadd	dword [edi + (17*4)]		; ST = autoc[17]+d*data[sample+17] d
-	fstp	dword [edi + (17*4)]		; autoc[17]+=d*data[sample+17]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (16*4)]		; ST = d*data[sample+16] d
-	fadd	dword [edi + (16*4)]		; ST = autoc[16]+d*data[sample+16] d
-	fstp	dword [edi + (16*4)]		; autoc[16]+=d*data[sample+16]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (15*4)]		; ST = d*data[sample+15] d
-	fadd	dword [edi + (15*4)]		; ST = autoc[15]+d*data[sample+15] d
-	fstp	dword [edi + (15*4)]		; autoc[15]+=d*data[sample+15]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (14*4)]		; ST = d*data[sample+14] d
-	fadd	dword [edi + (14*4)]		; ST = autoc[14]+d*data[sample+14] d
-	fstp	dword [edi + (14*4)]		; autoc[14]+=d*data[sample+14]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (13*4)]		; ST = d*data[sample+13] d
-	fadd	dword [edi + (13*4)]		; ST = autoc[13]+d*data[sample+13] d
-	fstp	dword [edi + (13*4)]		; autoc[13]+=d*data[sample+13]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (12*4)]		; ST = d*data[sample+12] d
-	fadd	dword [edi + (12*4)]		; ST = autoc[12]+d*data[sample+12] d
-	fstp	dword [edi + (12*4)]		; autoc[12]+=d*data[sample+12]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (11*4)]		; ST = d*data[sample+11] d
-	fadd	dword [edi + (11*4)]		; ST = autoc[11]+d*data[sample+11] d
-	fstp	dword [edi + (11*4)]		; autoc[11]+=d*data[sample+11]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (10*4)]		; ST = d*data[sample+10] d
-	fadd	dword [edi + (10*4)]		; ST = autoc[10]+d*data[sample+10] d
-	fstp	dword [edi + (10*4)]		; autoc[10]+=d*data[sample+10]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 9*4)]		; ST = d*data[sample+9] d
-	fadd	dword [edi + ( 9*4)]		; ST = autoc[9]+d*data[sample+9] d
-	fstp	dword [edi + ( 9*4)]		; autoc[9]+=d*data[sample+9]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 8*4)]		; ST = d*data[sample+8] d
-	fadd	dword [edi + ( 8*4)]		; ST = autoc[8]+d*data[sample+8] d
-	fstp	dword [edi + ( 8*4)]		; autoc[8]+=d*data[sample+8]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 7*4)]		; ST = d*data[sample+7] d
-	fadd	dword [edi + ( 7*4)]		; ST = autoc[7]+d*data[sample+7] d
-	fstp	dword [edi + ( 7*4)]		; autoc[7]+=d*data[sample+7]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 6*4)]		; ST = d*data[sample+6] d
-	fadd	dword [edi + ( 6*4)]		; ST = autoc[6]+d*data[sample+6] d
-	fstp	dword [edi + ( 6*4)]		; autoc[6]+=d*data[sample+6]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 5*4)]		; ST = d*data[sample+4] d
-	fadd	dword [edi + ( 5*4)]		; ST = autoc[4]+d*data[sample+4] d
-	fstp	dword [edi + ( 5*4)]		; autoc[4]+=d*data[sample+4]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 4*4)]		; ST = d*data[sample+4] d
-	fadd	dword [edi + ( 4*4)]		; ST = autoc[4]+d*data[sample+4] d
-	fstp	dword [edi + ( 4*4)]		; autoc[4]+=d*data[sample+4]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 3*4)]		; ST = d*data[sample+3] d
-	fadd	dword [edi + ( 3*4)]		; ST = autoc[3]+d*data[sample+3] d
-	fstp	dword [edi + ( 3*4)]		; autoc[3]+=d*data[sample+3]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 2*4)]		; ST = d*data[sample+2] d
-	fadd	dword [edi + ( 2*4)]		; ST = autoc[2]+d*data[sample+2] d
-	fstp	dword [edi + ( 2*4)]		; autoc[2]+=d*data[sample+2]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 1*4)]		; ST = d*data[sample+1] d
-	fadd	dword [edi + ( 1*4)]		; ST = autoc[1]+d*data[sample+1] d
-	fstp	dword [edi + ( 1*4)]		; autoc[1]+=d*data[sample+1]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi]			; ST = d*data[sample] d			WATCHOUT: no displacement byte here!
-	fadd	dword [edi]			; ST = autoc[0]+d*data[sample] d	WATCHOUT: no displacement byte here!
-	fstp	dword [edi]			; autoc[0]+=d*data[sample]  ST = d	WATCHOUT: no displacement byte here!
-.jumper2_0:
-
-	fstp	st0				; pop d, ST = empty
-	add	esi, byte 4			; sample++
-	dec	ecx
-	jz	.loop2_end
-	add	edx, byte 11			; adjust our inner loop counter by adjusting the jump target
-	fld	dword [esi]			; ST = d <- data[sample]
-	jmp	edx
-.loop2_end:
-
-.end:
-	pop	ebx
-	pop	edi
-	pop	esi
-	ret
-
-	ALIGN 16
-cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4
-	;[esp + 16] == autoc[]
-	;[esp + 12] == lag
-	;[esp + 8] == data_len
-	;[esp + 4] == data[]
-
-	;ASSERT(lag > 0)
-	;ASSERT(lag <= 4)
-	;ASSERT(lag <= data_len)
-
-	;	for(coeff = 0; coeff < lag; coeff++)
-	;		autoc[coeff] = 0.0;
-	xorps	xmm5, xmm5
-
-	mov	edx, [esp + 8]			; edx == data_len
-	mov	eax, [esp + 4]			; eax == &data[sample] <- &data[0]
-
-	movss	xmm0, [eax]			; xmm0 = 0,0,0,data[0]
-	add	eax, 4
-	movaps	xmm2, xmm0			; xmm2 = 0,0,0,data[0]
-	shufps	xmm0, xmm0, 0			; xmm0 == data[sample],data[sample],data[sample],data[sample] = data[0],data[0],data[0],data[0]
-.warmup:					; xmm2 == data[sample-3],data[sample-2],data[sample-1],data[sample]
-	mulps	xmm0, xmm2			; xmm0 = xmm0 * xmm2
-	addps	xmm5, xmm0			; xmm5 += xmm0 * xmm2
-	dec	edx
-	jz	.loop_end
-	ALIGN 16
-.loop_start:
-	; start by reading the next sample
-	movss	xmm0, [eax]			; xmm0 = 0,0,0,data[sample]
-	add	eax, 4
-	shufps	xmm0, xmm0, 0			; xmm0 = data[sample],data[sample],data[sample],data[sample]
-	shufps	xmm2, xmm2, 93h			; 93h=2-1-0-3 => xmm2 gets rotated left by one float
-	movss	xmm2, xmm0
-	mulps	xmm0, xmm2			; xmm0 = xmm0 * xmm2
-	addps	xmm5, xmm0			; xmm5 += xmm0 * xmm2
-	dec	edx
-	jnz	.loop_start
-.loop_end:
-	; store autoc
-	mov	edx, [esp + 16]			; edx == autoc
-	movups	[edx], xmm5
-
-.end:
-	ret
-
-	ALIGN 16
-cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8
-	;[esp + 16] == autoc[]
-	;[esp + 12] == lag
-	;[esp + 8] == data_len
-	;[esp + 4] == data[]
-
-	;ASSERT(lag > 0)
-	;ASSERT(lag <= 8)
-	;ASSERT(lag <= data_len)
-
-	;	for(coeff = 0; coeff < lag; coeff++)
-	;		autoc[coeff] = 0.0;
-	xorps	xmm5, xmm5
-	xorps	xmm6, xmm6
-
-	mov	edx, [esp + 8]			; edx == data_len
-	mov	eax, [esp + 4]			; eax == &data[sample] <- &data[0]
-
-	movss	xmm0, [eax]			; xmm0 = 0,0,0,data[0]
-	add	eax, 4
-	movaps	xmm2, xmm0			; xmm2 = 0,0,0,data[0]
-	shufps	xmm0, xmm0, 0			; xmm0 == data[sample],data[sample],data[sample],data[sample] = data[0],data[0],data[0],data[0]
-	movaps	xmm1, xmm0			; xmm1 == data[sample],data[sample],data[sample],data[sample] = data[0],data[0],data[0],data[0]
-	xorps	xmm3, xmm3			; xmm3 = 0,0,0,0
-.warmup:					; xmm3:xmm2 == data[sample-7],data[sample-6],...,data[sample]
-	mulps	xmm0, xmm2
-	mulps	xmm1, xmm3			; xmm1:xmm0 = xmm1:xmm0 * xmm3:xmm2
-	addps	xmm5, xmm0
-	addps	xmm6, xmm1			; xmm6:xmm5 += xmm1:xmm0 * xmm3:xmm2
-	dec	edx
-	jz	.loop_end
-	ALIGN 16
-.loop_start:
-	; start by reading the next sample
-	movss	xmm0, [eax]			; xmm0 = 0,0,0,data[sample]
-	; here we reorder the instructions; see the (#) indexes for a logical order
-	shufps	xmm2, xmm2, 93h			; (3) 93h=2-1-0-3 => xmm2 gets rotated left by one float
-	add	eax, 4				; (0)
-	shufps	xmm3, xmm3, 93h			; (4) 93h=2-1-0-3 => xmm3 gets rotated left by one float
-	shufps	xmm0, xmm0, 0			; (1) xmm0 = data[sample],data[sample],data[sample],data[sample]
-	movss	xmm3, xmm2			; (5)
-	movaps	xmm1, xmm0			; (2) xmm1 = data[sample],data[sample],data[sample],data[sample]
-	movss	xmm2, xmm0			; (6)
-	mulps	xmm1, xmm3			; (8)
-	mulps	xmm0, xmm2			; (7) xmm1:xmm0 = xmm1:xmm0 * xmm3:xmm2
-	addps	xmm6, xmm1			; (10)
-	addps	xmm5, xmm0			; (9) xmm6:xmm5 += xmm1:xmm0 * xmm3:xmm2
-	dec	edx
-	jnz	.loop_start
-.loop_end:
-	; store autoc
-	mov	edx, [esp + 16]			; edx == autoc
-	movups	[edx], xmm5
-	movups	[edx + 16], xmm6
-
-.end:
-	ret
-
-	ALIGN 16
-cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12
-	;[esp + 16] == autoc[]
-	;[esp + 12] == lag
-	;[esp + 8] == data_len
-	;[esp + 4] == data[]
-
-	;ASSERT(lag > 0)
-	;ASSERT(lag <= 12)
-	;ASSERT(lag <= data_len)
-
-	;	for(coeff = 0; coeff < lag; coeff++)
-	;		autoc[coeff] = 0.0;
-	xorps	xmm5, xmm5
-	xorps	xmm6, xmm6
-	xorps	xmm7, xmm7
-
-	mov	edx, [esp + 8]			; edx == data_len
-	mov	eax, [esp + 4]			; eax == &data[sample] <- &data[0]
-
-	movss	xmm0, [eax]			; xmm0 = 0,0,0,data[0]
-	add	eax, 4
-	movaps	xmm2, xmm0			; xmm2 = 0,0,0,data[0]
-	shufps	xmm0, xmm0, 0			; xmm0 == data[sample],data[sample],data[sample],data[sample] = data[0],data[0],data[0],data[0]
-	xorps	xmm3, xmm3			; xmm3 = 0,0,0,0
-	xorps	xmm4, xmm4			; xmm4 = 0,0,0,0
-.warmup:					; xmm3:xmm2 == data[sample-7],data[sample-6],...,data[sample]
-	movaps	xmm1, xmm0
-	mulps	xmm1, xmm2
-	addps	xmm5, xmm1
-	movaps	xmm1, xmm0
-	mulps	xmm1, xmm3
-	addps	xmm6, xmm1
-	mulps	xmm0, xmm4
-	addps	xmm7, xmm0			; xmm7:xmm6:xmm5 += xmm0:xmm0:xmm0 * xmm4:xmm3:xmm2
-	dec	edx
-	jz	.loop_end
-	ALIGN 16
-.loop_start:
-	; start by reading the next sample
-	movss	xmm0, [eax]			; xmm0 = 0,0,0,data[sample]
-	add	eax, 4
-	shufps	xmm0, xmm0, 0			; xmm0 = data[sample],data[sample],data[sample],data[sample]
-
-	; shift xmm4:xmm3:xmm2 left by one float
-	shufps	xmm2, xmm2, 93h			; 93h=2-1-0-3 => xmm2 gets rotated left by one float
-	shufps	xmm3, xmm3, 93h			; 93h=2-1-0-3 => xmm3 gets rotated left by one float
-	shufps	xmm4, xmm4, 93h			; 93h=2-1-0-3 => xmm4 gets rotated left by one float
-	movss	xmm4, xmm3
-	movss	xmm3, xmm2
-	movss	xmm2, xmm0
-
-	; xmm7:xmm6:xmm5 += xmm0:xmm0:xmm0 * xmm3:xmm3:xmm2
-	movaps	xmm1, xmm0
-	mulps	xmm1, xmm2
-	addps	xmm5, xmm1
-	movaps	xmm1, xmm0
-	mulps	xmm1, xmm3
-	addps	xmm6, xmm1
-	mulps	xmm0, xmm4
-	addps	xmm7, xmm0
-
-	dec	edx
-	jnz	.loop_start
-.loop_end:
-	; store autoc
-	mov	edx, [esp + 16]			; edx == autoc
-	movups	[edx], xmm5
-	movups	[edx + 16], xmm6
-	movups	[edx + 32], xmm7
-
-.end:
-	ret
-
-	align 16
-cident FLAC__lpc_compute_autocorrelation_asm_ia32_3dnow
-	;[ebp + 32] autoc
-	;[ebp + 28] lag
-	;[ebp + 24] data_len
-	;[ebp + 20] data
-
-	push	ebp
-	push	ebx
-	push	esi
-	push	edi
-	mov	ebp, esp
-
-	mov	esi, [ebp + 20]
-	mov	edi, [ebp + 24]
-	mov	edx, [ebp + 28]
-	inc	edx
-	and	edx, byte -2
-	mov	eax, edx
-	neg	eax
-	and	esp, byte -8
-	lea	esp, [esp + 4 * eax]
-	mov	ecx, edx
-	xor	eax, eax
-.loop0:
-	dec	ecx
-	mov	[esp + 4 * ecx], eax
-	jnz	short .loop0
-
-	mov	eax, edi
-	sub	eax, edx
-	mov	ebx, edx
-	and	ebx, byte 1
-	sub	eax, ebx
-	lea	ecx, [esi + 4 * eax - 12]
-	cmp	esi, ecx
-	mov	eax, esi
-	ja	short .loop2_pre
-	align	16		;4 nops
-.loop1_i:
-	movd	mm0, [eax]
-	movd	mm2, [eax + 4]
-	movd	mm4, [eax + 8]
-	movd	mm6, [eax + 12]
-	mov	ebx, edx
-	punpckldq	mm0, mm0
-	punpckldq	mm2, mm2
-	punpckldq	mm4, mm4
-	punpckldq	mm6, mm6
-	align	16		;3 nops
-.loop1_j:
-	sub	ebx, byte 2
-	movd	mm1, [eax + 4 * ebx]
-	movd	mm3, [eax + 4 * ebx + 4]
-	movd	mm5, [eax + 4 * ebx + 8]
-	movd	mm7, [eax + 4 * ebx + 12]
-	punpckldq	mm1, mm3
-	punpckldq	mm3, mm5
-	pfmul	mm1, mm0
-	punpckldq	mm5, mm7
-	pfmul	mm3, mm2
-	punpckldq	mm7, [eax + 4 * ebx + 16]
-	pfmul	mm5, mm4
-	pfmul	mm7, mm6
-	pfadd	mm1, mm3
-	movq	mm3, [esp + 4 * ebx]
-	pfadd	mm5, mm7
-	pfadd	mm1, mm5
-	pfadd	mm3, mm1
-	movq	[esp + 4 * ebx], mm3
-	jg	short .loop1_j
-
-	add	eax, byte 16
-	cmp	eax, ecx
-	jb	short .loop1_i
-
-.loop2_pre:
-	mov	ebx, eax
-	sub	eax, esi
-	shr	eax, 2
-	lea	ecx, [esi + 4 * edi]
-	mov	esi, ebx
-.loop2_i:
-	movd	mm0, [esi]
-	mov	ebx, edi
-	sub	ebx, eax
-	cmp	ebx, edx
-	jbe	short .loop2_j
-	mov	ebx, edx
-.loop2_j:
-	dec	ebx
-	movd	mm1, [esi + 4 * ebx]
-	pfmul	mm1, mm0
-	movd	mm2, [esp + 4 * ebx]
-	pfadd	mm1, mm2
-	movd	[esp + 4 * ebx], mm1
-
-	jnz	short .loop2_j
-
-	add	esi, byte 4
-	inc	eax
-	cmp	esi, ecx
-	jnz	short .loop2_i
-
-	mov	edi, [ebp + 32]
-	mov	edx, [ebp + 28]
-.loop3:
-	dec	edx
-	mov	eax, [esp + 4 * edx]
-	mov	[edi + 4 * edx], eax
-	jnz	short .loop3
-
-	femms
-
-	mov	esp, ebp
-	pop	edi
-	pop	esi
-	pop	ebx
-	pop	ebp
-	ret
-
-;void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
-;
-;	for(i = 0; i < data_len; i++) {
-;		sum = 0;
-;		for(j = 0; j < order; j++)
-;			sum += qlp_coeff[j] * data[i-j-1];
-;		residual[i] = data[i] - (sum >> lp_quantization);
-;	}
-;
-	ALIGN	16
-cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
-	;[esp + 40]	residual[]
-	;[esp + 36]	lp_quantization
-	;[esp + 32]	order
-	;[esp + 28]	qlp_coeff[]
-	;[esp + 24]	data_len
-	;[esp + 20]	data[]
-
-	;ASSERT(order > 0)
-
-	push	ebp
-	push	ebx
-	push	esi
-	push	edi
-
-	mov	esi, [esp + 20]			; esi = data[]
-	mov	edi, [esp + 40]			; edi = residual[]
-	mov	eax, [esp + 32]			; eax = order
-	mov	ebx, [esp + 24]			; ebx = data_len
-
-	test	ebx, ebx
-	jz	near .end			; do nothing if data_len == 0
-.begin:
-	cmp	eax, byte 1
-	jg	short .i_1more
-
-	mov	ecx, [esp + 28]
-	mov	edx, [ecx]			; edx = qlp_coeff[0]
-	mov	eax, [esi - 4]			; eax = data[-1]
-	mov	cl, [esp + 36]			; cl = lp_quantization
-	ALIGN	16
-.i_1_loop_i:
-	imul	eax, edx
-	sar	eax, cl
-	neg	eax
-	add	eax, [esi]
-	mov	[edi], eax
-	mov	eax, [esi]
-	add	edi, byte 4
-	add	esi, byte 4
-	dec	ebx
-	jnz	.i_1_loop_i
-
-	jmp	.end
-
-.i_1more:
-	cmp	eax, byte 32			; for order <= 32 there is a faster routine
-	jbe	short .i_32
-
-	; This version is here just for completeness, since FLAC__MAX_LPC_ORDER == 32
-	ALIGN 16
-.i_32more_loop_i:
-	xor	ebp, ebp
-	mov	ecx, [esp + 32]
-	mov	edx, ecx
-	shl	edx, 2
-	add	edx, [esp + 28]
-	neg	ecx
-	ALIGN	16
-.i_32more_loop_j:
-	sub	edx, byte 4
-	mov	eax, [edx]
-	imul	eax, [esi + 4 * ecx]
-	add	ebp, eax
-	inc	ecx
-	jnz	short .i_32more_loop_j
-
-	mov	cl, [esp + 36]
-	sar	ebp, cl
-	neg	ebp
-	add	ebp, [esi]
-	mov	[edi], ebp
-	add	esi, byte 4
-	add	edi, byte 4
-
-	dec	ebx
-	jnz	.i_32more_loop_i
-
-	jmp	.end
-
-.i_32:
-	sub	edi, esi
-	neg	eax
-	lea	edx, [eax + eax * 8 + .jumper_0 - .get_eip0]
-	call	.get_eip0
-.get_eip0:
-	pop	eax
-	add	edx, eax
-	inc	edx
-	mov	eax, [esp + 28]			; eax = qlp_coeff[]
-	xor	ebp, ebp
-	jmp	edx
-
-	mov	ecx, [eax + 124]
-	imul	ecx, [esi - 128]
-	add	ebp, ecx
-	mov	ecx, [eax + 120]
-	imul	ecx, [esi - 124]
-	add	ebp, ecx
-	mov	ecx, [eax + 116]
-	imul	ecx, [esi - 120]
-	add	ebp, ecx
-	mov	ecx, [eax + 112]
-	imul	ecx, [esi - 116]
-	add	ebp, ecx
-	mov	ecx, [eax + 108]
-	imul	ecx, [esi - 112]
-	add	ebp, ecx
-	mov	ecx, [eax + 104]
-	imul	ecx, [esi - 108]
-	add	ebp, ecx
-	mov	ecx, [eax + 100]
-	imul	ecx, [esi - 104]
-	add	ebp, ecx
-	mov	ecx, [eax + 96]
-	imul	ecx, [esi - 100]
-	add	ebp, ecx
-	mov	ecx, [eax + 92]
-	imul	ecx, [esi - 96]
-	add	ebp, ecx
-	mov	ecx, [eax + 88]
-	imul	ecx, [esi - 92]
-	add	ebp, ecx
-	mov	ecx, [eax + 84]
-	imul	ecx, [esi - 88]
-	add	ebp, ecx
-	mov	ecx, [eax + 80]
-	imul	ecx, [esi - 84]
-	add	ebp, ecx
-	mov	ecx, [eax + 76]
-	imul	ecx, [esi - 80]
-	add	ebp, ecx
-	mov	ecx, [eax + 72]
-	imul	ecx, [esi - 76]
-	add	ebp, ecx
-	mov	ecx, [eax + 68]
-	imul	ecx, [esi - 72]
-	add	ebp, ecx
-	mov	ecx, [eax + 64]
-	imul	ecx, [esi - 68]
-	add	ebp, ecx
-	mov	ecx, [eax + 60]
-	imul	ecx, [esi - 64]
-	add	ebp, ecx
-	mov	ecx, [eax + 56]
-	imul	ecx, [esi - 60]
-	add	ebp, ecx
-	mov	ecx, [eax + 52]
-	imul	ecx, [esi - 56]
-	add	ebp, ecx
-	mov	ecx, [eax + 48]
-	imul	ecx, [esi - 52]
-	add	ebp, ecx
-	mov	ecx, [eax + 44]
-	imul	ecx, [esi - 48]
-	add	ebp, ecx
-	mov	ecx, [eax + 40]
-	imul	ecx, [esi - 44]
-	add	ebp, ecx
-	mov	ecx, [eax + 36]
-	imul	ecx, [esi - 40]
-	add	ebp, ecx
-	mov	ecx, [eax + 32]
-	imul	ecx, [esi - 36]
-	add	ebp, ecx
-	mov	ecx, [eax + 28]
-	imul	ecx, [esi - 32]
-	add	ebp, ecx
-	mov	ecx, [eax + 24]
-	imul	ecx, [esi - 28]
-	add	ebp, ecx
-	mov	ecx, [eax + 20]
-	imul	ecx, [esi - 24]
-	add	ebp, ecx
-	mov	ecx, [eax + 16]
-	imul	ecx, [esi - 20]
-	add	ebp, ecx
-	mov	ecx, [eax + 12]
-	imul	ecx, [esi - 16]
-	add	ebp, ecx
-	mov	ecx, [eax + 8]
-	imul	ecx, [esi - 12]
-	add	ebp, ecx
-	mov	ecx, [eax + 4]
-	imul	ecx, [esi - 8]
-	add	ebp, ecx
-	mov	ecx, [eax]			; there is one byte missing
-	imul	ecx, [esi - 4]
-	add	ebp, ecx
-.jumper_0:
-
-	mov	cl, [esp + 36]
-	sar	ebp, cl
-	neg	ebp
-	add	ebp, [esi]
-	mov	[edi + esi], ebp
-	add	esi, byte 4
-
-	dec	ebx
-	jz	short .end
-	xor	ebp, ebp
-	jmp	edx
-
-.end:
-	pop	edi
-	pop	esi
-	pop	ebx
-	pop	ebp
-	ret
-
-; WATCHOUT: this routine works on 16 bit data which means bits-per-sample for
-; the channel must be <= 16.  Especially note that this routine cannot be used
-; for side-channel coded 16bps channels since the effective bps is 17.
-	ALIGN	16
-cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx
-	;[esp + 40]	residual[]
-	;[esp + 36]	lp_quantization
-	;[esp + 32]	order
-	;[esp + 28]	qlp_coeff[]
-	;[esp + 24]	data_len
-	;[esp + 20]	data[]
-
-	;ASSERT(order > 0)
-
-	push	ebp
-	push	ebx
-	push	esi
-	push	edi
-
-	mov	esi, [esp + 20]			; esi = data[]
-	mov	edi, [esp + 40]			; edi = residual[]
-	mov	eax, [esp + 32]			; eax = order
-	mov	ebx, [esp + 24]			; ebx = data_len
-
-	test	ebx, ebx
-	jz	near .end			; do nothing if data_len == 0
-	dec	ebx
-	test	ebx, ebx
-	jz	near .last_one
-
-	mov	edx, [esp + 28]			; edx = qlp_coeff[]
-	movd	mm6, [esp + 36]			; mm6 = 0:lp_quantization
-	mov	ebp, esp
-
-	and	esp, 0xfffffff8
-
-	xor	ecx, ecx
-.copy_qlp_loop:
-	push	word [edx + 4 * ecx]
-	inc	ecx
-	cmp	ecx, eax
-	jnz	short .copy_qlp_loop
-
-	and	ecx, 0x3
-	test	ecx, ecx
-	je	short .za_end
-	sub	ecx, byte 4
-.za_loop:
-	push	word 0
-	inc	eax
-	inc	ecx
-	jnz	short .za_loop
-.za_end:
-
-	movq	mm5, [esp + 2 * eax - 8]
-	movd	mm4, [esi - 16]
-	punpckldq	mm4, [esi - 12]
-	movd	mm0, [esi - 8]
-	punpckldq	mm0, [esi - 4]
-	packssdw	mm4, mm0
-
-	cmp	eax, byte 4
-	jnbe	short .mmx_4more
-
-	align	16
-.mmx_4_loop_i:
-	movd	mm1, [esi]
-	movq	mm3, mm4
-	punpckldq	mm1, [esi + 4]
-	psrlq	mm4, 16
-	movq	mm0, mm1
-	psllq	mm0, 48
-	por	mm4, mm0
-	movq	mm2, mm4
-	psrlq	mm4, 16
-	pxor	mm0, mm0
-	punpckhdq	mm0, mm1
-	pmaddwd	mm3, mm5
-	pmaddwd	mm2, mm5
-	psllq	mm0, 16
-	por	mm4, mm0
-	movq	mm0, mm3
-	punpckldq	mm3, mm2
-	punpckhdq	mm0, mm2
-	paddd	mm3, mm0
-	psrad	mm3, mm6
-	psubd	mm1, mm3
-	movd	[edi], mm1
-	punpckhdq	mm1, mm1
-	movd	[edi + 4], mm1
-
-	add	edi, byte 8
-	add	esi, byte 8
-
-	sub	ebx, 2
-	jg	.mmx_4_loop_i
-	jmp	.mmx_end
-
-.mmx_4more:
-	shl	eax, 2
-	neg	eax
-	add	eax, byte 16
-
-	align	16
-.mmx_4more_loop_i:
-	movd	mm1, [esi]
-	punpckldq	mm1, [esi + 4]
-	movq	mm3, mm4
-	psrlq	mm4, 16
-	movq	mm0, mm1
-	psllq	mm0, 48
-	por	mm4, mm0
-	movq	mm2, mm4
-	psrlq	mm4, 16
-	pxor	mm0, mm0
-	punpckhdq	mm0, mm1
-	pmaddwd	mm3, mm5
-	pmaddwd	mm2, mm5
-	psllq	mm0, 16
-	por	mm4, mm0
-
-	mov	ecx, esi
-	add	ecx, eax
-	mov	edx, esp
-
-	align	16
-.mmx_4more_loop_j:
-	movd	mm0, [ecx - 16]
-	movd	mm7, [ecx - 8]
-	punpckldq	mm0, [ecx - 12]
-	punpckldq	mm7, [ecx - 4]
-	packssdw	mm0, mm7
-	pmaddwd	mm0, [edx]
-	punpckhdq	mm7, mm7
-	paddd	mm3, mm0
-	movd	mm0, [ecx - 12]
-	punpckldq	mm0, [ecx - 8]
-	punpckldq	mm7, [ecx]
-	packssdw	mm0, mm7
-	pmaddwd	mm0, [edx]
-	paddd	mm2, mm0
-
-	add	edx, byte 8
-	add	ecx, byte 16
-	cmp	ecx, esi
-	jnz	.mmx_4more_loop_j
-
-	movq	mm0, mm3
-	punpckldq	mm3, mm2
-	punpckhdq	mm0, mm2
-	paddd	mm3, mm0
-	psrad	mm3, mm6
-	psubd	mm1, mm3
-	movd	[edi], mm1
-	punpckhdq	mm1, mm1
-	movd	[edi + 4], mm1
-
-	add	edi, byte 8
-	add	esi, byte 8
-
-	sub	ebx, 2
-	jg	near .mmx_4more_loop_i
-
-.mmx_end:
-	emms
-	mov	esp, ebp
-.last_one:
-	mov	eax, [esp + 32]
-	inc	ebx
-	jnz	near FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32.begin
-
-.end:
-	pop	edi
-	pop	esi
-	pop	ebx
-	pop	ebp
-	ret
-
-; **********************************************************************
-;
-; void FLAC__lpc_restore_signal(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
-; {
-; 	unsigned i, j;
-; 	FLAC__int32 sum;
-;
-; 	FLAC__ASSERT(order > 0);
-;
-; 	for(i = 0; i < data_len; i++) {
-; 		sum = 0;
-; 		for(j = 0; j < order; j++)
-; 			sum += qlp_coeff[j] * data[i-j-1];
-; 		data[i] = residual[i] + (sum >> lp_quantization);
-; 	}
-; }
-	ALIGN	16
-cident FLAC__lpc_restore_signal_asm_ia32
-	;[esp + 40]	data[]
-	;[esp + 36]	lp_quantization
-	;[esp + 32]	order
-	;[esp + 28]	qlp_coeff[]
-	;[esp + 24]	data_len
-	;[esp + 20]	residual[]
-
-	;ASSERT(order > 0)
-
-	push	ebp
-	push	ebx
-	push	esi
-	push	edi
-
-	mov	esi, [esp + 20]			; esi = residual[]
-	mov	edi, [esp + 40]			; edi = data[]
-	mov	eax, [esp + 32]			; eax = order
-	mov	ebx, [esp + 24]			; ebx = data_len
-
-	test	ebx, ebx
-	jz	near .end			; do nothing if data_len == 0
-
-.begin:
-	cmp	eax, byte 1
-	jg	short .x87_1more
-
-	mov	ecx, [esp + 28]
-	mov	edx, [ecx]
-	mov	eax, [edi - 4]
-	mov	cl, [esp + 36]
-	ALIGN	16
-.x87_1_loop_i:
-	imul	eax, edx
-	sar	eax, cl
-	add	eax, [esi]
-	mov	[edi], eax
-	add	esi, byte 4
-	add	edi, byte 4
-	dec	ebx
-	jnz	.x87_1_loop_i
-
-	jmp	.end
-
-.x87_1more:
-	cmp	eax, byte 32			; for order <= 32 there is a faster routine
-	jbe	short .x87_32
-
-	; This version is here just for completeness, since FLAC__MAX_LPC_ORDER == 32
-	ALIGN 16
-.x87_32more_loop_i:
-	xor	ebp, ebp
-	mov	ecx, [esp + 32]
-	mov	edx, ecx
-	shl	edx, 2
-	add	edx, [esp + 28]
-	neg	ecx
-	ALIGN	16
-.x87_32more_loop_j:
-	sub	edx, byte 4
-	mov	eax, [edx]
-	imul	eax, [edi + 4 * ecx]
-	add	ebp, eax
-	inc	ecx
-	jnz	short .x87_32more_loop_j
-
-	mov	cl, [esp + 36]
-	sar	ebp, cl
-	add	ebp, [esi]
-	mov	[edi], ebp
-	add	edi, byte 4
-	add	esi, byte 4
-
-	dec	ebx
-	jnz	.x87_32more_loop_i
-
-	jmp	.end
-
-.x87_32:
-	sub	esi, edi
-	neg	eax
-	lea	edx, [eax + eax * 8 + .jumper_0 - .get_eip0]
-	call	.get_eip0
-.get_eip0:
-	pop	eax
-	add	edx, eax
-	inc	edx				; compensate for the shorter opcode on the last iteration
-	mov	eax, [esp + 28]			; eax = qlp_coeff[]
-	xor	ebp, ebp
-	jmp	edx
-
-	mov	ecx, [eax + 124]		; ecx =  qlp_coeff[31]
-	imul	ecx, [edi - 128]		; ecx =  qlp_coeff[31] * data[i-32]
-	add	ebp, ecx			; sum += qlp_coeff[31] * data[i-32]
-	mov	ecx, [eax + 120]		; ecx =  qlp_coeff[30]
-	imul	ecx, [edi - 124]		; ecx =  qlp_coeff[30] * data[i-31]
-	add	ebp, ecx			; sum += qlp_coeff[30] * data[i-31]
-	mov	ecx, [eax + 116]		; ecx =  qlp_coeff[29]
-	imul	ecx, [edi - 120]		; ecx =  qlp_coeff[29] * data[i-30]
-	add	ebp, ecx			; sum += qlp_coeff[29] * data[i-30]
-	mov	ecx, [eax + 112]		; ecx =  qlp_coeff[28]
-	imul	ecx, [edi - 116]		; ecx =  qlp_coeff[28] * data[i-29]
-	add	ebp, ecx			; sum += qlp_coeff[28] * data[i-29]
-	mov	ecx, [eax + 108]		; ecx =  qlp_coeff[27]
-	imul	ecx, [edi - 112]		; ecx =  qlp_coeff[27] * data[i-28]
-	add	ebp, ecx			; sum += qlp_coeff[27] * data[i-28]
-	mov	ecx, [eax + 104]		; ecx =  qlp_coeff[26]
-	imul	ecx, [edi - 108]		; ecx =  qlp_coeff[26] * data[i-27]
-	add	ebp, ecx			; sum += qlp_coeff[26] * data[i-27]
-	mov	ecx, [eax + 100]		; ecx =  qlp_coeff[25]
-	imul	ecx, [edi - 104]		; ecx =  qlp_coeff[25] * data[i-26]
-	add	ebp, ecx			; sum += qlp_coeff[25] * data[i-26]
-	mov	ecx, [eax + 96]			; ecx =  qlp_coeff[24]
-	imul	ecx, [edi - 100]		; ecx =  qlp_coeff[24] * data[i-25]
-	add	ebp, ecx			; sum += qlp_coeff[24] * data[i-25]
-	mov	ecx, [eax + 92]			; ecx =  qlp_coeff[23]
-	imul	ecx, [edi - 96]			; ecx =  qlp_coeff[23] * data[i-24]
-	add	ebp, ecx			; sum += qlp_coeff[23] * data[i-24]
-	mov	ecx, [eax + 88]			; ecx =  qlp_coeff[22]
-	imul	ecx, [edi - 92]			; ecx =  qlp_coeff[22] * data[i-23]
-	add	ebp, ecx			; sum += qlp_coeff[22] * data[i-23]
-	mov	ecx, [eax + 84]			; ecx =  qlp_coeff[21]
-	imul	ecx, [edi - 88]			; ecx =  qlp_coeff[21] * data[i-22]
-	add	ebp, ecx			; sum += qlp_coeff[21] * data[i-22]
-	mov	ecx, [eax + 80]			; ecx =  qlp_coeff[20]
-	imul	ecx, [edi - 84]			; ecx =  qlp_coeff[20] * data[i-21]
-	add	ebp, ecx			; sum += qlp_coeff[20] * data[i-21]
-	mov	ecx, [eax + 76]			; ecx =  qlp_coeff[19]
-	imul	ecx, [edi - 80]			; ecx =  qlp_coeff[19] * data[i-20]
-	add	ebp, ecx			; sum += qlp_coeff[19] * data[i-20]
-	mov	ecx, [eax + 72]			; ecx =  qlp_coeff[18]
-	imul	ecx, [edi - 76]			; ecx =  qlp_coeff[18] * data[i-19]
-	add	ebp, ecx			; sum += qlp_coeff[18] * data[i-19]
-	mov	ecx, [eax + 68]			; ecx =  qlp_coeff[17]
-	imul	ecx, [edi - 72]			; ecx =  qlp_coeff[17] * data[i-18]
-	add	ebp, ecx			; sum += qlp_coeff[17] * data[i-18]
-	mov	ecx, [eax + 64]			; ecx =  qlp_coeff[16]
-	imul	ecx, [edi - 68]			; ecx =  qlp_coeff[16] * data[i-17]
-	add	ebp, ecx			; sum += qlp_coeff[16] * data[i-17]
-	mov	ecx, [eax + 60]			; ecx =  qlp_coeff[15]
-	imul	ecx, [edi - 64]			; ecx =  qlp_coeff[15] * data[i-16]
-	add	ebp, ecx			; sum += qlp_coeff[15] * data[i-16]
-	mov	ecx, [eax + 56]			; ecx =  qlp_coeff[14]
-	imul	ecx, [edi - 60]			; ecx =  qlp_coeff[14] * data[i-15]
-	add	ebp, ecx			; sum += qlp_coeff[14] * data[i-15]
-	mov	ecx, [eax + 52]			; ecx =  qlp_coeff[13]
-	imul	ecx, [edi - 56]			; ecx =  qlp_coeff[13] * data[i-14]
-	add	ebp, ecx			; sum += qlp_coeff[13] * data[i-14]
-	mov	ecx, [eax + 48]			; ecx =  qlp_coeff[12]
-	imul	ecx, [edi - 52]			; ecx =  qlp_coeff[12] * data[i-13]
-	add	ebp, ecx			; sum += qlp_coeff[12] * data[i-13]
-	mov	ecx, [eax + 44]			; ecx =  qlp_coeff[11]
-	imul	ecx, [edi - 48]			; ecx =  qlp_coeff[11] * data[i-12]
-	add	ebp, ecx			; sum += qlp_coeff[11] * data[i-12]
-	mov	ecx, [eax + 40]			; ecx =  qlp_coeff[10]
-	imul	ecx, [edi - 44]			; ecx =  qlp_coeff[10] * data[i-11]
-	add	ebp, ecx			; sum += qlp_coeff[10] * data[i-11]
-	mov	ecx, [eax + 36]			; ecx =  qlp_coeff[ 9]
-	imul	ecx, [edi - 40]			; ecx =  qlp_coeff[ 9] * data[i-10]
-	add	ebp, ecx			; sum += qlp_coeff[ 9] * data[i-10]
-	mov	ecx, [eax + 32]			; ecx =  qlp_coeff[ 8]
-	imul	ecx, [edi - 36]			; ecx =  qlp_coeff[ 8] * data[i- 9]
-	add	ebp, ecx			; sum += qlp_coeff[ 8] * data[i- 9]
-	mov	ecx, [eax + 28]			; ecx =  qlp_coeff[ 7]
-	imul	ecx, [edi - 32]			; ecx =  qlp_coeff[ 7] * data[i- 8]
-	add	ebp, ecx			; sum += qlp_coeff[ 7] * data[i- 8]
-	mov	ecx, [eax + 24]			; ecx =  qlp_coeff[ 6]
-	imul	ecx, [edi - 28]			; ecx =  qlp_coeff[ 6] * data[i- 7]
-	add	ebp, ecx			; sum += qlp_coeff[ 6] * data[i- 7]
-	mov	ecx, [eax + 20]			; ecx =  qlp_coeff[ 5]
-	imul	ecx, [edi - 24]			; ecx =  qlp_coeff[ 5] * data[i- 6]
-	add	ebp, ecx			; sum += qlp_coeff[ 5] * data[i- 6]
-	mov	ecx, [eax + 16]			; ecx =  qlp_coeff[ 4]
-	imul	ecx, [edi - 20]			; ecx =  qlp_coeff[ 4] * data[i- 5]
-	add	ebp, ecx			; sum += qlp_coeff[ 4] * data[i- 5]
-	mov	ecx, [eax + 12]			; ecx =  qlp_coeff[ 3]
-	imul	ecx, [edi - 16]			; ecx =  qlp_coeff[ 3] * data[i- 4]
-	add	ebp, ecx			; sum += qlp_coeff[ 3] * data[i- 4]
-	mov	ecx, [eax + 8]			; ecx =  qlp_coeff[ 2]
-	imul	ecx, [edi - 12]			; ecx =  qlp_coeff[ 2] * data[i- 3]
-	add	ebp, ecx			; sum += qlp_coeff[ 2] * data[i- 3]
-	mov	ecx, [eax + 4]			; ecx =  qlp_coeff[ 1]
-	imul	ecx, [edi - 8]			; ecx =  qlp_coeff[ 1] * data[i- 2]
-	add	ebp, ecx			; sum += qlp_coeff[ 1] * data[i- 2]
-	mov	ecx, [eax]			; ecx =  qlp_coeff[ 0] (NOTE: one byte missing from instruction)
-	imul	ecx, [edi - 4]			; ecx =  qlp_coeff[ 0] * data[i- 1]
-	add	ebp, ecx			; sum += qlp_coeff[ 0] * data[i- 1]
-.jumper_0:
-
-	mov	cl, [esp + 36]
-	sar	ebp, cl				; ebp = (sum >> lp_quantization)
-	add	ebp, [esi + edi]		; ebp = residual[i] + (sum >> lp_quantization)
-	mov	[edi], ebp			; data[i] = residual[i] + (sum >> lp_quantization)
-	add	edi, byte 4
-
-	dec	ebx
-	jz	short .end
-	xor	ebp, ebp
-	jmp	edx
-
-.end:
-	pop	edi
-	pop	esi
-	pop	ebx
-	pop	ebp
-	ret
-
-; WATCHOUT: this routine works on 16 bit data which means bits-per-sample for
-; the channel must be <= 16.  Especially note that this routine cannot be used
-; for side-channel coded 16bps channels since the effective bps is 17.
-; WATCHOUT: this routine requires that each data array have a buffer of up to
-; 3 zeroes in front (at negative indices) for alignment purposes, i.e. for each
-; channel n, data[n][-1] through data[n][-3] should be accessible and zero.
-	ALIGN	16
-cident FLAC__lpc_restore_signal_asm_ia32_mmx
-	;[esp + 40]	data[]
-	;[esp + 36]	lp_quantization
-	;[esp + 32]	order
-	;[esp + 28]	qlp_coeff[]
-	;[esp + 24]	data_len
-	;[esp + 20]	residual[]
-
-	;ASSERT(order > 0)
-
-	push	ebp
-	push	ebx
-	push	esi
-	push	edi
-
-	mov	esi, [esp + 20]
-	mov	edi, [esp + 40]
-	mov	eax, [esp + 32]
-	mov	ebx, [esp + 24]
-
-	test	ebx, ebx
-	jz	near .end			; do nothing if data_len == 0
-	cmp	eax, byte 4
-	jb	near FLAC__lpc_restore_signal_asm_ia32.begin
-
-	mov	edx, [esp + 28]
-	movd	mm6, [esp + 36]
-	mov	ebp, esp
-
-	and	esp, 0xfffffff8
-
-	xor	ecx, ecx
-.copy_qlp_loop:
-	push	word [edx + 4 * ecx]
-	inc	ecx
-	cmp	ecx, eax
-	jnz	short .copy_qlp_loop
-
-	and	ecx, 0x3
-	test	ecx, ecx
-	je	short .za_end
-	sub	ecx, byte 4
-.za_loop:
-	push	word 0
-	inc	eax
-	inc	ecx
-	jnz	short .za_loop
-.za_end:
-
-	movq	mm5, [esp + 2 * eax - 8]
-	movd	mm4, [edi - 16]
-	punpckldq	mm4, [edi - 12]
-	movd	mm0, [edi - 8]
-	punpckldq	mm0, [edi - 4]
-	packssdw	mm4, mm0
-
-	cmp	eax, byte 4
-	jnbe	short .mmx_4more
-
-	align	16
-.mmx_4_loop_i:
-	movq	mm7, mm4
-	pmaddwd	mm7, mm5
-	movq	mm0, mm7
-	punpckhdq	mm7, mm7
-	paddd	mm7, mm0
-	psrad	mm7, mm6
-	movd	mm1, [esi]
-	paddd	mm7, mm1
-	movd	[edi], mm7
-	psllq	mm7, 48
-	psrlq	mm4, 16
-	por	mm4, mm7
-
-	add	esi, byte 4
-	add	edi, byte 4
-
-	dec	ebx
-	jnz	.mmx_4_loop_i
-	jmp	.mmx_end
-.mmx_4more:
-	shl	eax, 2
-	neg	eax
-	add	eax, byte 16
-	align	16
-.mmx_4more_loop_i:
-	mov	ecx, edi
-	add	ecx, eax
-	mov	edx, esp
-
-	movq	mm7, mm4
-	pmaddwd	mm7, mm5
-
-	align	16
-.mmx_4more_loop_j:
-	movd	mm0, [ecx - 16]
-	punpckldq	mm0, [ecx - 12]
-	movd	mm1, [ecx - 8]
-	punpckldq	mm1, [ecx - 4]
-	packssdw	mm0, mm1
-	pmaddwd	mm0, [edx]
-	paddd	mm7, mm0
-
-	add	edx, byte 8
-	add	ecx, byte 16
-	cmp	ecx, edi
-	jnz	.mmx_4more_loop_j
-
-	movq	mm0, mm7
-	punpckhdq	mm7, mm7
-	paddd	mm7, mm0
-	psrad	mm7, mm6
-	movd	mm1, [esi]
-	paddd	mm7, mm1
-	movd	[edi], mm7
-	psllq	mm7, 48
-	psrlq	mm4, 16
-	por	mm4, mm7
-
-	add	esi, byte 4
-	add	edi, byte 4
-
-	dec	ebx
-	jnz	short .mmx_4more_loop_i
-.mmx_end:
-	emms
-	mov	esp, ebp
-
-.end:
-	pop	edi
-	pop	esi
-	pop	ebx
-	pop	ebp
-	ret
-
-end
+;  vim:filetype=nasm ts=8
+
+;  libFLAC - Free Lossless Audio Codec library
+;  Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+;
+;  Redistribution and use in source and binary forms, with or without
+;  modification, are permitted provided that the following conditions
+;  are met:
+;
+;  - Redistributions of source code must retain the above copyright
+;  notice, this list of conditions and the following disclaimer.
+;
+;  - Redistributions in binary form must reproduce the above copyright
+;  notice, this list of conditions and the following disclaimer in the
+;  documentation and/or other materials provided with the distribution.
+;
+;  - Neither the name of the Xiph.org Foundation nor the names of its
+;  contributors may be used to endorse or promote products derived from
+;  this software without specific prior written permission.
+;
+;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+;  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+;  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+;  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+%include "ia32/nasm.h"
+
+	data_section
+
+cglobal FLAC__lpc_compute_autocorrelation_asm_ia32
+cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4
+cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8
+cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12
+cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_3dnow
+cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
+cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx
+cglobal FLAC__lpc_restore_signal_asm_ia32
+cglobal FLAC__lpc_restore_signal_asm_ia32_mmx
+
+	code_section
+
+; **********************************************************************
+;
+; void FLAC__lpc_compute_autocorrelation_asm(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
+; {
+;	FLAC__real d;
+;	unsigned sample, coeff;
+;	const unsigned limit = data_len - lag;
+;
+;	FLAC__ASSERT(lag > 0);
+;	FLAC__ASSERT(lag <= data_len);
+;
+;	for(coeff = 0; coeff < lag; coeff++)
+;		autoc[coeff] = 0.0;
+;	for(sample = 0; sample <= limit; sample++) {
+;		d = data[sample];
+;		for(coeff = 0; coeff < lag; coeff++)
+;			autoc[coeff] += d * data[sample+coeff];
+;	}
+;	for(; sample < data_len; sample++) {
+;		d = data[sample];
+;		for(coeff = 0; coeff < data_len - sample; coeff++)
+;			autoc[coeff] += d * data[sample+coeff];
+;	}
+; }
+;
+	ALIGN 16
+cident FLAC__lpc_compute_autocorrelation_asm_ia32
+	;[esp + 28] == autoc[]
+	;[esp + 24] == lag
+	;[esp + 20] == data_len
+	;[esp + 16] == data[]
+
+	;ASSERT(lag > 0)
+	;ASSERT(lag <= 33)
+	;ASSERT(lag <= data_len)
+
+.begin:
+	push	esi
+	push	edi
+	push	ebx
+
+	;	for(coeff = 0; coeff < lag; coeff++)
+	;		autoc[coeff] = 0.0;
+	mov	edi, [esp + 28]			; edi == autoc
+	mov	ecx, [esp + 24]			; ecx = # of dwords (=lag) of 0 to write
+	xor	eax, eax
+	rep	stosd
+
+	;	const unsigned limit = data_len - lag;
+	mov	eax, [esp + 24]			; eax == lag
+	mov	ecx, [esp + 20]
+	sub	ecx, eax			; ecx == limit
+
+	mov	edi, [esp + 28]			; edi == autoc
+	mov	esi, [esp + 16]			; esi == data
+	inc	ecx				; we are looping <= limit so we add one to the counter
+
+	;	for(sample = 0; sample <= limit; sample++) {
+	;		d = data[sample];
+	;		for(coeff = 0; coeff < lag; coeff++)
+	;			autoc[coeff] += d * data[sample+coeff];
+	;	}
+	fld	dword [esi]			; ST = d <- data[sample]
+	; each iteration is 11 bytes so we need (-eax)*11, so we do (-12*eax + eax)
+	lea	edx, [eax + eax*2]
+	neg	edx
+	lea	edx, [eax + edx*4 + .jumper1_0 - .get_eip1]
+	call	.get_eip1
+.get_eip1:
+	pop	ebx
+	add	edx, ebx
+	inc	edx				; compensate for the shorter opcode on the last iteration
+	inc	edx				; compensate for the shorter opcode on the last iteration
+	inc	edx				; compensate for the shorter opcode on the last iteration
+	cmp	eax, 33
+	jne	.loop1_start
+	sub	edx, byte 9			; compensate for the longer opcodes on the first iteration
+.loop1_start:
+	jmp	edx
+
+	fld	st0				; ST = d d
+	fmul	dword [esi + (32*4)]		; ST = d*data[sample+32] d		WATCHOUT: not a byte displacement here!
+	fadd	dword [edi + (32*4)]		; ST = autoc[32]+d*data[sample+32] d	WATCHOUT: not a byte displacement here!
+	fstp	dword [edi + (32*4)]		; autoc[32]+=d*data[sample+32]  ST = d	WATCHOUT: not a byte displacement here!
+	fld	st0				; ST = d d
+	fmul	dword [esi + (31*4)]		; ST = d*data[sample+31] d
+	fadd	dword [edi + (31*4)]		; ST = autoc[31]+d*data[sample+31] d
+	fstp	dword [edi + (31*4)]		; autoc[31]+=d*data[sample+31]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (30*4)]		; ST = d*data[sample+30] d
+	fadd	dword [edi + (30*4)]		; ST = autoc[30]+d*data[sample+30] d
+	fstp	dword [edi + (30*4)]		; autoc[30]+=d*data[sample+30]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (29*4)]		; ST = d*data[sample+29] d
+	fadd	dword [edi + (29*4)]		; ST = autoc[29]+d*data[sample+29] d
+	fstp	dword [edi + (29*4)]		; autoc[29]+=d*data[sample+29]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (28*4)]		; ST = d*data[sample+28] d
+	fadd	dword [edi + (28*4)]		; ST = autoc[28]+d*data[sample+28] d
+	fstp	dword [edi + (28*4)]		; autoc[28]+=d*data[sample+28]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (27*4)]		; ST = d*data[sample+27] d
+	fadd	dword [edi + (27*4)]		; ST = autoc[27]+d*data[sample+27] d
+	fstp	dword [edi + (27*4)]		; autoc[27]+=d*data[sample+27]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (26*4)]		; ST = d*data[sample+26] d
+	fadd	dword [edi + (26*4)]		; ST = autoc[26]+d*data[sample+26] d
+	fstp	dword [edi + (26*4)]		; autoc[26]+=d*data[sample+26]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (25*4)]		; ST = d*data[sample+25] d
+	fadd	dword [edi + (25*4)]		; ST = autoc[25]+d*data[sample+25] d
+	fstp	dword [edi + (25*4)]		; autoc[25]+=d*data[sample+25]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (24*4)]		; ST = d*data[sample+24] d
+	fadd	dword [edi + (24*4)]		; ST = autoc[24]+d*data[sample+24] d
+	fstp	dword [edi + (24*4)]		; autoc[24]+=d*data[sample+24]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (23*4)]		; ST = d*data[sample+23] d
+	fadd	dword [edi + (23*4)]		; ST = autoc[23]+d*data[sample+23] d
+	fstp	dword [edi + (23*4)]		; autoc[23]+=d*data[sample+23]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (22*4)]		; ST = d*data[sample+22] d
+	fadd	dword [edi + (22*4)]		; ST = autoc[22]+d*data[sample+22] d
+	fstp	dword [edi + (22*4)]		; autoc[22]+=d*data[sample+22]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (21*4)]		; ST = d*data[sample+21] d
+	fadd	dword [edi + (21*4)]		; ST = autoc[21]+d*data[sample+21] d
+	fstp	dword [edi + (21*4)]		; autoc[21]+=d*data[sample+21]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (20*4)]		; ST = d*data[sample+20] d
+	fadd	dword [edi + (20*4)]		; ST = autoc[20]+d*data[sample+20] d
+	fstp	dword [edi + (20*4)]		; autoc[20]+=d*data[sample+20]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (19*4)]		; ST = d*data[sample+19] d
+	fadd	dword [edi + (19*4)]		; ST = autoc[19]+d*data[sample+19] d
+	fstp	dword [edi + (19*4)]		; autoc[19]+=d*data[sample+19]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (18*4)]		; ST = d*data[sample+18] d
+	fadd	dword [edi + (18*4)]		; ST = autoc[18]+d*data[sample+18] d
+	fstp	dword [edi + (18*4)]		; autoc[18]+=d*data[sample+18]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (17*4)]		; ST = d*data[sample+17] d
+	fadd	dword [edi + (17*4)]		; ST = autoc[17]+d*data[sample+17] d
+	fstp	dword [edi + (17*4)]		; autoc[17]+=d*data[sample+17]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (16*4)]		; ST = d*data[sample+16] d
+	fadd	dword [edi + (16*4)]		; ST = autoc[16]+d*data[sample+16] d
+	fstp	dword [edi + (16*4)]		; autoc[16]+=d*data[sample+16]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (15*4)]		; ST = d*data[sample+15] d
+	fadd	dword [edi + (15*4)]		; ST = autoc[15]+d*data[sample+15] d
+	fstp	dword [edi + (15*4)]		; autoc[15]+=d*data[sample+15]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (14*4)]		; ST = d*data[sample+14] d
+	fadd	dword [edi + (14*4)]		; ST = autoc[14]+d*data[sample+14] d
+	fstp	dword [edi + (14*4)]		; autoc[14]+=d*data[sample+14]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (13*4)]		; ST = d*data[sample+13] d
+	fadd	dword [edi + (13*4)]		; ST = autoc[13]+d*data[sample+13] d
+	fstp	dword [edi + (13*4)]		; autoc[13]+=d*data[sample+13]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (12*4)]		; ST = d*data[sample+12] d
+	fadd	dword [edi + (12*4)]		; ST = autoc[12]+d*data[sample+12] d
+	fstp	dword [edi + (12*4)]		; autoc[12]+=d*data[sample+12]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (11*4)]		; ST = d*data[sample+11] d
+	fadd	dword [edi + (11*4)]		; ST = autoc[11]+d*data[sample+11] d
+	fstp	dword [edi + (11*4)]		; autoc[11]+=d*data[sample+11]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (10*4)]		; ST = d*data[sample+10] d
+	fadd	dword [edi + (10*4)]		; ST = autoc[10]+d*data[sample+10] d
+	fstp	dword [edi + (10*4)]		; autoc[10]+=d*data[sample+10]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + ( 9*4)]		; ST = d*data[sample+9] d
+	fadd	dword [edi + ( 9*4)]		; ST = autoc[9]+d*data[sample+9] d
+	fstp	dword [edi + ( 9*4)]		; autoc[9]+=d*data[sample+9]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + ( 8*4)]		; ST = d*data[sample+8] d
+	fadd	dword [edi + ( 8*4)]		; ST = autoc[8]+d*data[sample+8] d
+	fstp	dword [edi + ( 8*4)]		; autoc[8]+=d*data[sample+8]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + ( 7*4)]		; ST = d*data[sample+7] d
+	fadd	dword [edi + ( 7*4)]		; ST = autoc[7]+d*data[sample+7] d
+	fstp	dword [edi + ( 7*4)]		; autoc[7]+=d*data[sample+7]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + ( 6*4)]		; ST = d*data[sample+6] d
+	fadd	dword [edi + ( 6*4)]		; ST = autoc[6]+d*data[sample+6] d
+	fstp	dword [edi + ( 6*4)]		; autoc[6]+=d*data[sample+6]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + ( 5*4)]		; ST = d*data[sample+4] d
+	fadd	dword [edi + ( 5*4)]		; ST = autoc[4]+d*data[sample+4] d
+	fstp	dword [edi + ( 5*4)]		; autoc[4]+=d*data[sample+4]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + ( 4*4)]		; ST = d*data[sample+4] d
+	fadd	dword [edi + ( 4*4)]		; ST = autoc[4]+d*data[sample+4] d
+	fstp	dword [edi + ( 4*4)]		; autoc[4]+=d*data[sample+4]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + ( 3*4)]		; ST = d*data[sample+3] d
+	fadd	dword [edi + ( 3*4)]		; ST = autoc[3]+d*data[sample+3] d
+	fstp	dword [edi + ( 3*4)]		; autoc[3]+=d*data[sample+3]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + ( 2*4)]		; ST = d*data[sample+2] d
+	fadd	dword [edi + ( 2*4)]		; ST = autoc[2]+d*data[sample+2] d
+	fstp	dword [edi + ( 2*4)]		; autoc[2]+=d*data[sample+2]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + ( 1*4)]		; ST = d*data[sample+1] d
+	fadd	dword [edi + ( 1*4)]		; ST = autoc[1]+d*data[sample+1] d
+	fstp	dword [edi + ( 1*4)]		; autoc[1]+=d*data[sample+1]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi]			; ST = d*data[sample] d			WATCHOUT: no displacement byte here!
+	fadd	dword [edi]			; ST = autoc[0]+d*data[sample] d	WATCHOUT: no displacement byte here!
+	fstp	dword [edi]			; autoc[0]+=d*data[sample]  ST = d	WATCHOUT: no displacement byte here!
+.jumper1_0:
+
+	fstp	st0				; pop d, ST = empty
+	add	esi, byte 4			; sample++
+	dec	ecx
+	jz	.loop1_end
+	fld	dword [esi]			; ST = d <- data[sample]
+	jmp	edx
+.loop1_end:
+
+	;	for(; sample < data_len; sample++) {
+	;		d = data[sample];
+	;		for(coeff = 0; coeff < data_len - sample; coeff++)
+	;			autoc[coeff] += d * data[sample+coeff];
+	;	}
+	mov	ecx, [esp + 24]			; ecx <- lag
+	dec	ecx				; ecx <- lag - 1
+	jz	near .end			; skip loop if 0 (i.e. lag == 1)
+
+	fld	dword [esi]			; ST = d <- data[sample]
+	mov	eax, ecx			; eax <- lag - 1 == data_len - sample the first time through
+	; each iteration is 11 bytes so we need (-eax)*11, so we do (-12*eax + eax)
+	lea	edx, [eax + eax*2]
+	neg	edx
+	lea	edx, [eax + edx*4 + .jumper2_0 - .get_eip2]
+	call	.get_eip2
+.get_eip2:
+	pop	ebx
+	add	edx, ebx
+	inc	edx				; compensate for the shorter opcode on the last iteration
+	inc	edx				; compensate for the shorter opcode on the last iteration
+	inc	edx				; compensate for the shorter opcode on the last iteration
+	jmp	edx
+
+	fld	st0				; ST = d d
+	fmul	dword [esi + (31*4)]		; ST = d*data[sample+31] d
+	fadd	dword [edi + (31*4)]		; ST = autoc[31]+d*data[sample+31] d
+	fstp	dword [edi + (31*4)]		; autoc[31]+=d*data[sample+31]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (30*4)]		; ST = d*data[sample+30] d
+	fadd	dword [edi + (30*4)]		; ST = autoc[30]+d*data[sample+30] d
+	fstp	dword [edi + (30*4)]		; autoc[30]+=d*data[sample+30]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (29*4)]		; ST = d*data[sample+29] d
+	fadd	dword [edi + (29*4)]		; ST = autoc[29]+d*data[sample+29] d
+	fstp	dword [edi + (29*4)]		; autoc[29]+=d*data[sample+29]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (28*4)]		; ST = d*data[sample+28] d
+	fadd	dword [edi + (28*4)]		; ST = autoc[28]+d*data[sample+28] d
+	fstp	dword [edi + (28*4)]		; autoc[28]+=d*data[sample+28]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (27*4)]		; ST = d*data[sample+27] d
+	fadd	dword [edi + (27*4)]		; ST = autoc[27]+d*data[sample+27] d
+	fstp	dword [edi + (27*4)]		; autoc[27]+=d*data[sample+27]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (26*4)]		; ST = d*data[sample+26] d
+	fadd	dword [edi + (26*4)]		; ST = autoc[26]+d*data[sample+26] d
+	fstp	dword [edi + (26*4)]		; autoc[26]+=d*data[sample+26]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (25*4)]		; ST = d*data[sample+25] d
+	fadd	dword [edi + (25*4)]		; ST = autoc[25]+d*data[sample+25] d
+	fstp	dword [edi + (25*4)]		; autoc[25]+=d*data[sample+25]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (24*4)]		; ST = d*data[sample+24] d
+	fadd	dword [edi + (24*4)]		; ST = autoc[24]+d*data[sample+24] d
+	fstp	dword [edi + (24*4)]		; autoc[24]+=d*data[sample+24]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (23*4)]		; ST = d*data[sample+23] d
+	fadd	dword [edi + (23*4)]		; ST = autoc[23]+d*data[sample+23] d
+	fstp	dword [edi + (23*4)]		; autoc[23]+=d*data[sample+23]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (22*4)]		; ST = d*data[sample+22] d
+	fadd	dword [edi + (22*4)]		; ST = autoc[22]+d*data[sample+22] d
+	fstp	dword [edi + (22*4)]		; autoc[22]+=d*data[sample+22]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (21*4)]		; ST = d*data[sample+21] d
+	fadd	dword [edi + (21*4)]		; ST = autoc[21]+d*data[sample+21] d
+	fstp	dword [edi + (21*4)]		; autoc[21]+=d*data[sample+21]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (20*4)]		; ST = d*data[sample+20] d
+	fadd	dword [edi + (20*4)]		; ST = autoc[20]+d*data[sample+20] d
+	fstp	dword [edi + (20*4)]		; autoc[20]+=d*data[sample+20]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (19*4)]		; ST = d*data[sample+19] d
+	fadd	dword [edi + (19*4)]		; ST = autoc[19]+d*data[sample+19] d
+	fstp	dword [edi + (19*4)]		; autoc[19]+=d*data[sample+19]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (18*4)]		; ST = d*data[sample+18] d
+	fadd	dword [edi + (18*4)]		; ST = autoc[18]+d*data[sample+18] d
+	fstp	dword [edi + (18*4)]		; autoc[18]+=d*data[sample+18]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (17*4)]		; ST = d*data[sample+17] d
+	fadd	dword [edi + (17*4)]		; ST = autoc[17]+d*data[sample+17] d
+	fstp	dword [edi + (17*4)]		; autoc[17]+=d*data[sample+17]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (16*4)]		; ST = d*data[sample+16] d
+	fadd	dword [edi + (16*4)]		; ST = autoc[16]+d*data[sample+16] d
+	fstp	dword [edi + (16*4)]		; autoc[16]+=d*data[sample+16]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (15*4)]		; ST = d*data[sample+15] d
+	fadd	dword [edi + (15*4)]		; ST = autoc[15]+d*data[sample+15] d
+	fstp	dword [edi + (15*4)]		; autoc[15]+=d*data[sample+15]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (14*4)]		; ST = d*data[sample+14] d
+	fadd	dword [edi + (14*4)]		; ST = autoc[14]+d*data[sample+14] d
+	fstp	dword [edi + (14*4)]		; autoc[14]+=d*data[sample+14]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (13*4)]		; ST = d*data[sample+13] d
+	fadd	dword [edi + (13*4)]		; ST = autoc[13]+d*data[sample+13] d
+	fstp	dword [edi + (13*4)]		; autoc[13]+=d*data[sample+13]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (12*4)]		; ST = d*data[sample+12] d
+	fadd	dword [edi + (12*4)]		; ST = autoc[12]+d*data[sample+12] d
+	fstp	dword [edi + (12*4)]		; autoc[12]+=d*data[sample+12]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (11*4)]		; ST = d*data[sample+11] d
+	fadd	dword [edi + (11*4)]		; ST = autoc[11]+d*data[sample+11] d
+	fstp	dword [edi + (11*4)]		; autoc[11]+=d*data[sample+11]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + (10*4)]		; ST = d*data[sample+10] d
+	fadd	dword [edi + (10*4)]		; ST = autoc[10]+d*data[sample+10] d
+	fstp	dword [edi + (10*4)]		; autoc[10]+=d*data[sample+10]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + ( 9*4)]		; ST = d*data[sample+9] d
+	fadd	dword [edi + ( 9*4)]		; ST = autoc[9]+d*data[sample+9] d
+	fstp	dword [edi + ( 9*4)]		; autoc[9]+=d*data[sample+9]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + ( 8*4)]		; ST = d*data[sample+8] d
+	fadd	dword [edi + ( 8*4)]		; ST = autoc[8]+d*data[sample+8] d
+	fstp	dword [edi + ( 8*4)]		; autoc[8]+=d*data[sample+8]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + ( 7*4)]		; ST = d*data[sample+7] d
+	fadd	dword [edi + ( 7*4)]		; ST = autoc[7]+d*data[sample+7] d
+	fstp	dword [edi + ( 7*4)]		; autoc[7]+=d*data[sample+7]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + ( 6*4)]		; ST = d*data[sample+6] d
+	fadd	dword [edi + ( 6*4)]		; ST = autoc[6]+d*data[sample+6] d
+	fstp	dword [edi + ( 6*4)]		; autoc[6]+=d*data[sample+6]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + ( 5*4)]		; ST = d*data[sample+4] d
+	fadd	dword [edi + ( 5*4)]		; ST = autoc[4]+d*data[sample+4] d
+	fstp	dword [edi + ( 5*4)]		; autoc[4]+=d*data[sample+4]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + ( 4*4)]		; ST = d*data[sample+4] d
+	fadd	dword [edi + ( 4*4)]		; ST = autoc[4]+d*data[sample+4] d
+	fstp	dword [edi + ( 4*4)]		; autoc[4]+=d*data[sample+4]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + ( 3*4)]		; ST = d*data[sample+3] d
+	fadd	dword [edi + ( 3*4)]		; ST = autoc[3]+d*data[sample+3] d
+	fstp	dword [edi + ( 3*4)]		; autoc[3]+=d*data[sample+3]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + ( 2*4)]		; ST = d*data[sample+2] d
+	fadd	dword [edi + ( 2*4)]		; ST = autoc[2]+d*data[sample+2] d
+	fstp	dword [edi + ( 2*4)]		; autoc[2]+=d*data[sample+2]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi + ( 1*4)]		; ST = d*data[sample+1] d
+	fadd	dword [edi + ( 1*4)]		; ST = autoc[1]+d*data[sample+1] d
+	fstp	dword [edi + ( 1*4)]		; autoc[1]+=d*data[sample+1]  ST = d
+	fld	st0				; ST = d d
+	fmul	dword [esi]			; ST = d*data[sample] d			WATCHOUT: no displacement byte here!
+	fadd	dword [edi]			; ST = autoc[0]+d*data[sample] d	WATCHOUT: no displacement byte here!
+	fstp	dword [edi]			; autoc[0]+=d*data[sample]  ST = d	WATCHOUT: no displacement byte here!
+.jumper2_0:
+
+	fstp	st0				; pop d, ST = empty
+	add	esi, byte 4			; sample++
+	dec	ecx
+	jz	.loop2_end
+	add	edx, byte 11			; adjust our inner loop counter by adjusting the jump target
+	fld	dword [esi]			; ST = d <- data[sample]
+	jmp	edx
+.loop2_end:
+
+.end:
+	pop	ebx
+	pop	edi
+	pop	esi
+	ret
+
+	ALIGN 16
+cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4
+	;[esp + 16] == autoc[]
+	;[esp + 12] == lag
+	;[esp + 8] == data_len
+	;[esp + 4] == data[]
+
+	;ASSERT(lag > 0)
+	;ASSERT(lag <= 4)
+	;ASSERT(lag <= data_len)
+
+	;	for(coeff = 0; coeff < lag; coeff++)
+	;		autoc[coeff] = 0.0;
+	xorps	xmm5, xmm5
+
+	mov	edx, [esp + 8]			; edx == data_len
+	mov	eax, [esp + 4]			; eax == &data[sample] <- &data[0]
+
+	movss	xmm0, [eax]			; xmm0 = 0,0,0,data[0]
+	add	eax, 4
+	movaps	xmm2, xmm0			; xmm2 = 0,0,0,data[0]
+	shufps	xmm0, xmm0, 0			; xmm0 == data[sample],data[sample],data[sample],data[sample] = data[0],data[0],data[0],data[0]
+.warmup:					; xmm2 == data[sample-3],data[sample-2],data[sample-1],data[sample]
+	mulps	xmm0, xmm2			; xmm0 = xmm0 * xmm2
+	addps	xmm5, xmm0			; xmm5 += xmm0 * xmm2
+	dec	edx
+	jz	.loop_end
+	ALIGN 16
+.loop_start:
+	; start by reading the next sample
+	movss	xmm0, [eax]			; xmm0 = 0,0,0,data[sample]
+	add	eax, 4
+	shufps	xmm0, xmm0, 0			; xmm0 = data[sample],data[sample],data[sample],data[sample]
+	shufps	xmm2, xmm2, 93h			; 93h=2-1-0-3 => xmm2 gets rotated left by one float
+	movss	xmm2, xmm0
+	mulps	xmm0, xmm2			; xmm0 = xmm0 * xmm2
+	addps	xmm5, xmm0			; xmm5 += xmm0 * xmm2
+	dec	edx
+	jnz	.loop_start
+.loop_end:
+	; store autoc
+	mov	edx, [esp + 16]			; edx == autoc
+	movups	[edx], xmm5
+
+.end:
+	ret
+
+	ALIGN 16
+cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8
+	;[esp + 16] == autoc[]
+	;[esp + 12] == lag
+	;[esp + 8] == data_len
+	;[esp + 4] == data[]
+
+	;ASSERT(lag > 0)
+	;ASSERT(lag <= 8)
+	;ASSERT(lag <= data_len)
+
+	;	for(coeff = 0; coeff < lag; coeff++)
+	;		autoc[coeff] = 0.0;
+	xorps	xmm5, xmm5
+	xorps	xmm6, xmm6
+
+	mov	edx, [esp + 8]			; edx == data_len
+	mov	eax, [esp + 4]			; eax == &data[sample] <- &data[0]
+
+	movss	xmm0, [eax]			; xmm0 = 0,0,0,data[0]
+	add	eax, 4
+	movaps	xmm2, xmm0			; xmm2 = 0,0,0,data[0]
+	shufps	xmm0, xmm0, 0			; xmm0 == data[sample],data[sample],data[sample],data[sample] = data[0],data[0],data[0],data[0]
+	movaps	xmm1, xmm0			; xmm1 == data[sample],data[sample],data[sample],data[sample] = data[0],data[0],data[0],data[0]
+	xorps	xmm3, xmm3			; xmm3 = 0,0,0,0
+.warmup:					; xmm3:xmm2 == data[sample-7],data[sample-6],...,data[sample]
+	mulps	xmm0, xmm2
+	mulps	xmm1, xmm3			; xmm1:xmm0 = xmm1:xmm0 * xmm3:xmm2
+	addps	xmm5, xmm0
+	addps	xmm6, xmm1			; xmm6:xmm5 += xmm1:xmm0 * xmm3:xmm2
+	dec	edx
+	jz	.loop_end
+	ALIGN 16
+.loop_start:
+	; start by reading the next sample
+	movss	xmm0, [eax]			; xmm0 = 0,0,0,data[sample]
+	; here we reorder the instructions; see the (#) indexes for a logical order
+	shufps	xmm2, xmm2, 93h			; (3) 93h=2-1-0-3 => xmm2 gets rotated left by one float
+	add	eax, 4				; (0)
+	shufps	xmm3, xmm3, 93h			; (4) 93h=2-1-0-3 => xmm3 gets rotated left by one float
+	shufps	xmm0, xmm0, 0			; (1) xmm0 = data[sample],data[sample],data[sample],data[sample]
+	movss	xmm3, xmm2			; (5)
+	movaps	xmm1, xmm0			; (2) xmm1 = data[sample],data[sample],data[sample],data[sample]
+	movss	xmm2, xmm0			; (6)
+	mulps	xmm1, xmm3			; (8)
+	mulps	xmm0, xmm2			; (7) xmm1:xmm0 = xmm1:xmm0 * xmm3:xmm2
+	addps	xmm6, xmm1			; (10)
+	addps	xmm5, xmm0			; (9) xmm6:xmm5 += xmm1:xmm0 * xmm3:xmm2
+	dec	edx
+	jnz	.loop_start
+.loop_end:
+	; store autoc
+	mov	edx, [esp + 16]			; edx == autoc
+	movups	[edx], xmm5
+	movups	[edx + 16], xmm6
+
+.end:
+	ret
+
+	ALIGN 16
+cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12
+	;[esp + 16] == autoc[]
+	;[esp + 12] == lag
+	;[esp + 8] == data_len
+	;[esp + 4] == data[]
+
+	;ASSERT(lag > 0)
+	;ASSERT(lag <= 12)
+	;ASSERT(lag <= data_len)
+
+	;	for(coeff = 0; coeff < lag; coeff++)
+	;		autoc[coeff] = 0.0;
+	xorps	xmm5, xmm5
+	xorps	xmm6, xmm6
+	xorps	xmm7, xmm7
+
+	mov	edx, [esp + 8]			; edx == data_len
+	mov	eax, [esp + 4]			; eax == &data[sample] <- &data[0]
+
+	movss	xmm0, [eax]			; xmm0 = 0,0,0,data[0]
+	add	eax, 4
+	movaps	xmm2, xmm0			; xmm2 = 0,0,0,data[0]
+	shufps	xmm0, xmm0, 0			; xmm0 == data[sample],data[sample],data[sample],data[sample] = data[0],data[0],data[0],data[0]
+	xorps	xmm3, xmm3			; xmm3 = 0,0,0,0
+	xorps	xmm4, xmm4			; xmm4 = 0,0,0,0
+.warmup:					; xmm3:xmm2 == data[sample-7],data[sample-6],...,data[sample]
+	movaps	xmm1, xmm0
+	mulps	xmm1, xmm2
+	addps	xmm5, xmm1
+	movaps	xmm1, xmm0
+	mulps	xmm1, xmm3
+	addps	xmm6, xmm1
+	mulps	xmm0, xmm4
+	addps	xmm7, xmm0			; xmm7:xmm6:xmm5 += xmm0:xmm0:xmm0 * xmm4:xmm3:xmm2
+	dec	edx
+	jz	.loop_end
+	ALIGN 16
+.loop_start:
+	; start by reading the next sample
+	movss	xmm0, [eax]			; xmm0 = 0,0,0,data[sample]
+	add	eax, 4
+	shufps	xmm0, xmm0, 0			; xmm0 = data[sample],data[sample],data[sample],data[sample]
+
+	; shift xmm4:xmm3:xmm2 left by one float
+	shufps	xmm2, xmm2, 93h			; 93h=2-1-0-3 => xmm2 gets rotated left by one float
+	shufps	xmm3, xmm3, 93h			; 93h=2-1-0-3 => xmm3 gets rotated left by one float
+	shufps	xmm4, xmm4, 93h			; 93h=2-1-0-3 => xmm4 gets rotated left by one float
+	movss	xmm4, xmm3
+	movss	xmm3, xmm2
+	movss	xmm2, xmm0
+
+	; xmm7:xmm6:xmm5 += xmm0:xmm0:xmm0 * xmm3:xmm3:xmm2
+	movaps	xmm1, xmm0
+	mulps	xmm1, xmm2
+	addps	xmm5, xmm1
+	movaps	xmm1, xmm0
+	mulps	xmm1, xmm3
+	addps	xmm6, xmm1
+	mulps	xmm0, xmm4
+	addps	xmm7, xmm0
+
+	dec	edx
+	jnz	.loop_start
+.loop_end:
+	; store autoc
+	mov	edx, [esp + 16]			; edx == autoc
+	movups	[edx], xmm5
+	movups	[edx + 16], xmm6
+	movups	[edx + 32], xmm7
+
+.end:
+	ret
+
+	ALIGN 16
+cident FLAC__lpc_compute_autocorrelation_asm_ia32_3dnow
+	;[ebp + 32] autoc
+	;[ebp + 28] lag
+	;[ebp + 24] data_len
+	;[ebp + 20] data
+
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	mov	ebp, esp
+
+	mov	esi, [ebp + 20]
+	mov	edi, [ebp + 24]
+	mov	edx, [ebp + 28]
+	inc	edx
+	and	edx, byte -2
+	mov	eax, edx
+	neg	eax
+	and	esp, byte -8
+	lea	esp, [esp + 4 * eax]
+	mov	ecx, edx
+	xor	eax, eax
+.loop0:
+	dec	ecx
+	mov	[esp + 4 * ecx], eax
+	jnz	short .loop0
+
+	mov	eax, edi
+	sub	eax, edx
+	mov	ebx, edx
+	and	ebx, byte 1
+	sub	eax, ebx
+	lea	ecx, [esi + 4 * eax - 12]
+	cmp	esi, ecx
+	mov	eax, esi
+	ja	short .loop2_pre
+	ALIGN	16		;4 nops
+.loop1_i:
+	movd	mm0, [eax]
+	movd	mm2, [eax + 4]
+	movd	mm4, [eax + 8]
+	movd	mm6, [eax + 12]
+	mov	ebx, edx
+	punpckldq	mm0, mm0
+	punpckldq	mm2, mm2
+	punpckldq	mm4, mm4
+	punpckldq	mm6, mm6
+	ALIGN	16		;3 nops
+.loop1_j:
+	sub	ebx, byte 2
+	movd	mm1, [eax + 4 * ebx]
+	movd	mm3, [eax + 4 * ebx + 4]
+	movd	mm5, [eax + 4 * ebx + 8]
+	movd	mm7, [eax + 4 * ebx + 12]
+	punpckldq	mm1, mm3
+	punpckldq	mm3, mm5
+	pfmul	mm1, mm0
+	punpckldq	mm5, mm7
+	pfmul	mm3, mm2
+	punpckldq	mm7, [eax + 4 * ebx + 16]
+	pfmul	mm5, mm4
+	pfmul	mm7, mm6
+	pfadd	mm1, mm3
+	movq	mm3, [esp + 4 * ebx]
+	pfadd	mm5, mm7
+	pfadd	mm1, mm5
+	pfadd	mm3, mm1
+	movq	[esp + 4 * ebx], mm3
+	jg	short .loop1_j
+
+	add	eax, byte 16
+	cmp	eax, ecx
+	jb	short .loop1_i
+
+.loop2_pre:
+	mov	ebx, eax
+	sub	eax, esi
+	shr	eax, 2
+	lea	ecx, [esi + 4 * edi]
+	mov	esi, ebx
+.loop2_i:
+	movd	mm0, [esi]
+	mov	ebx, edi
+	sub	ebx, eax
+	cmp	ebx, edx
+	jbe	short .loop2_j
+	mov	ebx, edx
+.loop2_j:
+	dec	ebx
+	movd	mm1, [esi + 4 * ebx]
+	pfmul	mm1, mm0
+	movd	mm2, [esp + 4 * ebx]
+	pfadd	mm1, mm2
+	movd	[esp + 4 * ebx], mm1
+
+	jnz	short .loop2_j
+
+	add	esi, byte 4
+	inc	eax
+	cmp	esi, ecx
+	jnz	short .loop2_i
+
+	mov	edi, [ebp + 32]
+	mov	edx, [ebp + 28]
+.loop3:
+	dec	edx
+	mov	eax, [esp + 4 * edx]
+	mov	[edi + 4 * edx], eax
+	jnz	short .loop3
+
+	femms
+
+	mov	esp, ebp
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+
+;void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
+;
+;	for(i = 0; i < data_len; i++) {
+;		sum = 0;
+;		for(j = 0; j < order; j++)
+;			sum += qlp_coeff[j] * data[i-j-1];
+;		residual[i] = data[i] - (sum >> lp_quantization);
+;	}
+;
+	ALIGN	16
+cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
+	;[esp + 40]	residual[]
+	;[esp + 36]	lp_quantization
+	;[esp + 32]	order
+	;[esp + 28]	qlp_coeff[]
+	;[esp + 24]	data_len
+	;[esp + 20]	data[]
+
+	;ASSERT(order > 0)
+
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+
+	mov	esi, [esp + 20]			; esi = data[]
+	mov	edi, [esp + 40]			; edi = residual[]
+	mov	eax, [esp + 32]			; eax = order
+	mov	ebx, [esp + 24]			; ebx = data_len
+
+	test	ebx, ebx
+	jz	near .end			; do nothing if data_len == 0
+.begin:
+	cmp	eax, byte 1
+	jg	short .i_1more
+
+	mov	ecx, [esp + 28]
+	mov	edx, [ecx]			; edx = qlp_coeff[0]
+	mov	eax, [esi - 4]			; eax = data[-1]
+	mov	cl, [esp + 36]			; cl = lp_quantization
+	ALIGN	16
+.i_1_loop_i:
+	imul	eax, edx
+	sar	eax, cl
+	neg	eax
+	add	eax, [esi]
+	mov	[edi], eax
+	mov	eax, [esi]
+	add	edi, byte 4
+	add	esi, byte 4
+	dec	ebx
+	jnz	.i_1_loop_i
+
+	jmp	.end
+
+.i_1more:
+	cmp	eax, byte 32			; for order <= 32 there is a faster routine
+	jbe	short .i_32
+
+	; This version is here just for completeness, since FLAC__MAX_LPC_ORDER == 32
+	ALIGN 16
+.i_32more_loop_i:
+	xor	ebp, ebp
+	mov	ecx, [esp + 32]
+	mov	edx, ecx
+	shl	edx, 2
+	add	edx, [esp + 28]
+	neg	ecx
+	ALIGN	16
+.i_32more_loop_j:
+	sub	edx, byte 4
+	mov	eax, [edx]
+	imul	eax, [esi + 4 * ecx]
+	add	ebp, eax
+	inc	ecx
+	jnz	short .i_32more_loop_j
+
+	mov	cl, [esp + 36]
+	sar	ebp, cl
+	neg	ebp
+	add	ebp, [esi]
+	mov	[edi], ebp
+	add	esi, byte 4
+	add	edi, byte 4
+
+	dec	ebx
+	jnz	.i_32more_loop_i
+
+	jmp	.end
+
+.i_32:
+	sub	edi, esi
+	neg	eax
+	lea	edx, [eax + eax * 8 + .jumper_0 - .get_eip0]
+	call	.get_eip0
+.get_eip0:
+	pop	eax
+	add	edx, eax
+	inc	edx
+	mov	eax, [esp + 28]			; eax = qlp_coeff[]
+	xor	ebp, ebp
+	jmp	edx
+
+	mov	ecx, [eax + 124]
+	imul	ecx, [esi - 128]
+	add	ebp, ecx
+	mov	ecx, [eax + 120]
+	imul	ecx, [esi - 124]
+	add	ebp, ecx
+	mov	ecx, [eax + 116]
+	imul	ecx, [esi - 120]
+	add	ebp, ecx
+	mov	ecx, [eax + 112]
+	imul	ecx, [esi - 116]
+	add	ebp, ecx
+	mov	ecx, [eax + 108]
+	imul	ecx, [esi - 112]
+	add	ebp, ecx
+	mov	ecx, [eax + 104]
+	imul	ecx, [esi - 108]
+	add	ebp, ecx
+	mov	ecx, [eax + 100]
+	imul	ecx, [esi - 104]
+	add	ebp, ecx
+	mov	ecx, [eax + 96]
+	imul	ecx, [esi - 100]
+	add	ebp, ecx
+	mov	ecx, [eax + 92]
+	imul	ecx, [esi - 96]
+	add	ebp, ecx
+	mov	ecx, [eax + 88]
+	imul	ecx, [esi - 92]
+	add	ebp, ecx
+	mov	ecx, [eax + 84]
+	imul	ecx, [esi - 88]
+	add	ebp, ecx
+	mov	ecx, [eax + 80]
+	imul	ecx, [esi - 84]
+	add	ebp, ecx
+	mov	ecx, [eax + 76]
+	imul	ecx, [esi - 80]
+	add	ebp, ecx
+	mov	ecx, [eax + 72]
+	imul	ecx, [esi - 76]
+	add	ebp, ecx
+	mov	ecx, [eax + 68]
+	imul	ecx, [esi - 72]
+	add	ebp, ecx
+	mov	ecx, [eax + 64]
+	imul	ecx, [esi - 68]
+	add	ebp, ecx
+	mov	ecx, [eax + 60]
+	imul	ecx, [esi - 64]
+	add	ebp, ecx
+	mov	ecx, [eax + 56]
+	imul	ecx, [esi - 60]
+	add	ebp, ecx
+	mov	ecx, [eax + 52]
+	imul	ecx, [esi - 56]
+	add	ebp, ecx
+	mov	ecx, [eax + 48]
+	imul	ecx, [esi - 52]
+	add	ebp, ecx
+	mov	ecx, [eax + 44]
+	imul	ecx, [esi - 48]
+	add	ebp, ecx
+	mov	ecx, [eax + 40]
+	imul	ecx, [esi - 44]
+	add	ebp, ecx
+	mov	ecx, [eax + 36]
+	imul	ecx, [esi - 40]
+	add	ebp, ecx
+	mov	ecx, [eax + 32]
+	imul	ecx, [esi - 36]
+	add	ebp, ecx
+	mov	ecx, [eax + 28]
+	imul	ecx, [esi - 32]
+	add	ebp, ecx
+	mov	ecx, [eax + 24]
+	imul	ecx, [esi - 28]
+	add	ebp, ecx
+	mov	ecx, [eax + 20]
+	imul	ecx, [esi - 24]
+	add	ebp, ecx
+	mov	ecx, [eax + 16]
+	imul	ecx, [esi - 20]
+	add	ebp, ecx
+	mov	ecx, [eax + 12]
+	imul	ecx, [esi - 16]
+	add	ebp, ecx
+	mov	ecx, [eax + 8]
+	imul	ecx, [esi - 12]
+	add	ebp, ecx
+	mov	ecx, [eax + 4]
+	imul	ecx, [esi - 8]
+	add	ebp, ecx
+	mov	ecx, [eax]			; there is one byte missing
+	imul	ecx, [esi - 4]
+	add	ebp, ecx
+.jumper_0:
+
+	mov	cl, [esp + 36]
+	sar	ebp, cl
+	neg	ebp
+	add	ebp, [esi]
+	mov	[edi + esi], ebp
+	add	esi, byte 4
+
+	dec	ebx
+	jz	short .end
+	xor	ebp, ebp
+	jmp	edx
+
+.end:
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+
+; WATCHOUT: this routine works on 16 bit data which means bits-per-sample for
+; the channel and qlp_coeffs must be <= 16.  Especially note that this routine
+; cannot be used for side-channel coded 16bps channels since the effective bps
+; is 17.
+	ALIGN	16
+cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx
+	;[esp + 40]	residual[]
+	;[esp + 36]	lp_quantization
+	;[esp + 32]	order
+	;[esp + 28]	qlp_coeff[]
+	;[esp + 24]	data_len
+	;[esp + 20]	data[]
+
+	;ASSERT(order > 0)
+
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+
+	mov	esi, [esp + 20]			; esi = data[]
+	mov	edi, [esp + 40]			; edi = residual[]
+	mov	eax, [esp + 32]			; eax = order
+	mov	ebx, [esp + 24]			; ebx = data_len
+
+	test	ebx, ebx
+	jz	near .end			; do nothing if data_len == 0
+	dec	ebx
+	test	ebx, ebx
+	jz	near .last_one
+
+	mov	edx, [esp + 28]			; edx = qlp_coeff[]
+	movd	mm6, [esp + 36]			; mm6 = 0:lp_quantization
+	mov	ebp, esp
+
+	and	esp, 0xfffffff8
+
+	xor	ecx, ecx
+.copy_qlp_loop:
+	push	word [edx + 4 * ecx]
+	inc	ecx
+	cmp	ecx, eax
+	jnz	short .copy_qlp_loop
+
+	and	ecx, 0x3
+	test	ecx, ecx
+	je	short .za_end
+	sub	ecx, byte 4
+.za_loop:
+	push	word 0
+	inc	eax
+	inc	ecx
+	jnz	short .za_loop
+.za_end:
+
+	movq	mm5, [esp + 2 * eax - 8]
+	movd	mm4, [esi - 16]
+	punpckldq	mm4, [esi - 12]
+	movd	mm0, [esi - 8]
+	punpckldq	mm0, [esi - 4]
+	packssdw	mm4, mm0
+
+	cmp	eax, byte 4
+	jnbe	short .mmx_4more
+
+	ALIGN	16
+.mmx_4_loop_i:
+	movd	mm1, [esi]
+	movq	mm3, mm4
+	punpckldq	mm1, [esi + 4]
+	psrlq	mm4, 16
+	movq	mm0, mm1
+	psllq	mm0, 48
+	por	mm4, mm0
+	movq	mm2, mm4
+	psrlq	mm4, 16
+	pxor	mm0, mm0
+	punpckhdq	mm0, mm1
+	pmaddwd	mm3, mm5
+	pmaddwd	mm2, mm5
+	psllq	mm0, 16
+	por	mm4, mm0
+	movq	mm0, mm3
+	punpckldq	mm3, mm2
+	punpckhdq	mm0, mm2
+	paddd	mm3, mm0
+	psrad	mm3, mm6
+	psubd	mm1, mm3
+	movd	[edi], mm1
+	punpckhdq	mm1, mm1
+	movd	[edi + 4], mm1
+
+	add	edi, byte 8
+	add	esi, byte 8
+
+	sub	ebx, 2
+	jg	.mmx_4_loop_i
+	jmp	.mmx_end
+
+.mmx_4more:
+	shl	eax, 2
+	neg	eax
+	add	eax, byte 16
+
+	ALIGN	16
+.mmx_4more_loop_i:
+	movd	mm1, [esi]
+	punpckldq	mm1, [esi + 4]
+	movq	mm3, mm4
+	psrlq	mm4, 16
+	movq	mm0, mm1
+	psllq	mm0, 48
+	por	mm4, mm0
+	movq	mm2, mm4
+	psrlq	mm4, 16
+	pxor	mm0, mm0
+	punpckhdq	mm0, mm1
+	pmaddwd	mm3, mm5
+	pmaddwd	mm2, mm5
+	psllq	mm0, 16
+	por	mm4, mm0
+
+	mov	ecx, esi
+	add	ecx, eax
+	mov	edx, esp
+
+	ALIGN	16
+.mmx_4more_loop_j:
+	movd	mm0, [ecx - 16]
+	movd	mm7, [ecx - 8]
+	punpckldq	mm0, [ecx - 12]
+	punpckldq	mm7, [ecx - 4]
+	packssdw	mm0, mm7
+	pmaddwd	mm0, [edx]
+	punpckhdq	mm7, mm7
+	paddd	mm3, mm0
+	movd	mm0, [ecx - 12]
+	punpckldq	mm0, [ecx - 8]
+	punpckldq	mm7, [ecx]
+	packssdw	mm0, mm7
+	pmaddwd	mm0, [edx]
+	paddd	mm2, mm0
+
+	add	edx, byte 8
+	add	ecx, byte 16
+	cmp	ecx, esi
+	jnz	.mmx_4more_loop_j
+
+	movq	mm0, mm3
+	punpckldq	mm3, mm2
+	punpckhdq	mm0, mm2
+	paddd	mm3, mm0
+	psrad	mm3, mm6
+	psubd	mm1, mm3
+	movd	[edi], mm1
+	punpckhdq	mm1, mm1
+	movd	[edi + 4], mm1
+
+	add	edi, byte 8
+	add	esi, byte 8
+
+	sub	ebx, 2
+	jg	near .mmx_4more_loop_i
+
+.mmx_end:
+	emms
+	mov	esp, ebp
+.last_one:
+	mov	eax, [esp + 32]
+	inc	ebx
+	jnz	near FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32.begin
+
+.end:
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+
+; **********************************************************************
+;
+; void FLAC__lpc_restore_signal(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
+; {
+; 	unsigned i, j;
+; 	FLAC__int32 sum;
+;
+; 	FLAC__ASSERT(order > 0);
+;
+; 	for(i = 0; i < data_len; i++) {
+; 		sum = 0;
+; 		for(j = 0; j < order; j++)
+; 			sum += qlp_coeff[j] * data[i-j-1];
+; 		data[i] = residual[i] + (sum >> lp_quantization);
+; 	}
+; }
+	ALIGN	16
+cident FLAC__lpc_restore_signal_asm_ia32
+	;[esp + 40]	data[]
+	;[esp + 36]	lp_quantization
+	;[esp + 32]	order
+	;[esp + 28]	qlp_coeff[]
+	;[esp + 24]	data_len
+	;[esp + 20]	residual[]
+
+	;ASSERT(order > 0)
+
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+
+	mov	esi, [esp + 20]			; esi = residual[]
+	mov	edi, [esp + 40]			; edi = data[]
+	mov	eax, [esp + 32]			; eax = order
+	mov	ebx, [esp + 24]			; ebx = data_len
+
+	test	ebx, ebx
+	jz	near .end			; do nothing if data_len == 0
+
+.begin:
+	cmp	eax, byte 1
+	jg	short .x87_1more
+
+	mov	ecx, [esp + 28]
+	mov	edx, [ecx]
+	mov	eax, [edi - 4]
+	mov	cl, [esp + 36]
+	ALIGN	16
+.x87_1_loop_i:
+	imul	eax, edx
+	sar	eax, cl
+	add	eax, [esi]
+	mov	[edi], eax
+	add	esi, byte 4
+	add	edi, byte 4
+	dec	ebx
+	jnz	.x87_1_loop_i
+
+	jmp	.end
+
+.x87_1more:
+	cmp	eax, byte 32			; for order <= 32 there is a faster routine
+	jbe	short .x87_32
+
+	; This version is here just for completeness, since FLAC__MAX_LPC_ORDER == 32
+	ALIGN 16
+.x87_32more_loop_i:
+	xor	ebp, ebp
+	mov	ecx, [esp + 32]
+	mov	edx, ecx
+	shl	edx, 2
+	add	edx, [esp + 28]
+	neg	ecx
+	ALIGN	16
+.x87_32more_loop_j:
+	sub	edx, byte 4
+	mov	eax, [edx]
+	imul	eax, [edi + 4 * ecx]
+	add	ebp, eax
+	inc	ecx
+	jnz	short .x87_32more_loop_j
+
+	mov	cl, [esp + 36]
+	sar	ebp, cl
+	add	ebp, [esi]
+	mov	[edi], ebp
+	add	edi, byte 4
+	add	esi, byte 4
+
+	dec	ebx
+	jnz	.x87_32more_loop_i
+
+	jmp	.end
+
+.x87_32:
+	sub	esi, edi
+	neg	eax
+	lea	edx, [eax + eax * 8 + .jumper_0 - .get_eip0]
+	call	.get_eip0
+.get_eip0:
+	pop	eax
+	add	edx, eax
+	inc	edx				; compensate for the shorter opcode on the last iteration
+	mov	eax, [esp + 28]			; eax = qlp_coeff[]
+	xor	ebp, ebp
+	jmp	edx
+
+	mov	ecx, [eax + 124]		; ecx =  qlp_coeff[31]
+	imul	ecx, [edi - 128]		; ecx =  qlp_coeff[31] * data[i-32]
+	add	ebp, ecx			; sum += qlp_coeff[31] * data[i-32]
+	mov	ecx, [eax + 120]		; ecx =  qlp_coeff[30]
+	imul	ecx, [edi - 124]		; ecx =  qlp_coeff[30] * data[i-31]
+	add	ebp, ecx			; sum += qlp_coeff[30] * data[i-31]
+	mov	ecx, [eax + 116]		; ecx =  qlp_coeff[29]
+	imul	ecx, [edi - 120]		; ecx =  qlp_coeff[29] * data[i-30]
+	add	ebp, ecx			; sum += qlp_coeff[29] * data[i-30]
+	mov	ecx, [eax + 112]		; ecx =  qlp_coeff[28]
+	imul	ecx, [edi - 116]		; ecx =  qlp_coeff[28] * data[i-29]
+	add	ebp, ecx			; sum += qlp_coeff[28] * data[i-29]
+	mov	ecx, [eax + 108]		; ecx =  qlp_coeff[27]
+	imul	ecx, [edi - 112]		; ecx =  qlp_coeff[27] * data[i-28]
+	add	ebp, ecx			; sum += qlp_coeff[27] * data[i-28]
+	mov	ecx, [eax + 104]		; ecx =  qlp_coeff[26]
+	imul	ecx, [edi - 108]		; ecx =  qlp_coeff[26] * data[i-27]
+	add	ebp, ecx			; sum += qlp_coeff[26] * data[i-27]
+	mov	ecx, [eax + 100]		; ecx =  qlp_coeff[25]
+	imul	ecx, [edi - 104]		; ecx =  qlp_coeff[25] * data[i-26]
+	add	ebp, ecx			; sum += qlp_coeff[25] * data[i-26]
+	mov	ecx, [eax + 96]			; ecx =  qlp_coeff[24]
+	imul	ecx, [edi - 100]		; ecx =  qlp_coeff[24] * data[i-25]
+	add	ebp, ecx			; sum += qlp_coeff[24] * data[i-25]
+	mov	ecx, [eax + 92]			; ecx =  qlp_coeff[23]
+	imul	ecx, [edi - 96]			; ecx =  qlp_coeff[23] * data[i-24]
+	add	ebp, ecx			; sum += qlp_coeff[23] * data[i-24]
+	mov	ecx, [eax + 88]			; ecx =  qlp_coeff[22]
+	imul	ecx, [edi - 92]			; ecx =  qlp_coeff[22] * data[i-23]
+	add	ebp, ecx			; sum += qlp_coeff[22] * data[i-23]
+	mov	ecx, [eax + 84]			; ecx =  qlp_coeff[21]
+	imul	ecx, [edi - 88]			; ecx =  qlp_coeff[21] * data[i-22]
+	add	ebp, ecx			; sum += qlp_coeff[21] * data[i-22]
+	mov	ecx, [eax + 80]			; ecx =  qlp_coeff[20]
+	imul	ecx, [edi - 84]			; ecx =  qlp_coeff[20] * data[i-21]
+	add	ebp, ecx			; sum += qlp_coeff[20] * data[i-21]
+	mov	ecx, [eax + 76]			; ecx =  qlp_coeff[19]
+	imul	ecx, [edi - 80]			; ecx =  qlp_coeff[19] * data[i-20]
+	add	ebp, ecx			; sum += qlp_coeff[19] * data[i-20]
+	mov	ecx, [eax + 72]			; ecx =  qlp_coeff[18]
+	imul	ecx, [edi - 76]			; ecx =  qlp_coeff[18] * data[i-19]
+	add	ebp, ecx			; sum += qlp_coeff[18] * data[i-19]
+	mov	ecx, [eax + 68]			; ecx =  qlp_coeff[17]
+	imul	ecx, [edi - 72]			; ecx =  qlp_coeff[17] * data[i-18]
+	add	ebp, ecx			; sum += qlp_coeff[17] * data[i-18]
+	mov	ecx, [eax + 64]			; ecx =  qlp_coeff[16]
+	imul	ecx, [edi - 68]			; ecx =  qlp_coeff[16] * data[i-17]
+	add	ebp, ecx			; sum += qlp_coeff[16] * data[i-17]
+	mov	ecx, [eax + 60]			; ecx =  qlp_coeff[15]
+	imul	ecx, [edi - 64]			; ecx =  qlp_coeff[15] * data[i-16]
+	add	ebp, ecx			; sum += qlp_coeff[15] * data[i-16]
+	mov	ecx, [eax + 56]			; ecx =  qlp_coeff[14]
+	imul	ecx, [edi - 60]			; ecx =  qlp_coeff[14] * data[i-15]
+	add	ebp, ecx			; sum += qlp_coeff[14] * data[i-15]
+	mov	ecx, [eax + 52]			; ecx =  qlp_coeff[13]
+	imul	ecx, [edi - 56]			; ecx =  qlp_coeff[13] * data[i-14]
+	add	ebp, ecx			; sum += qlp_coeff[13] * data[i-14]
+	mov	ecx, [eax + 48]			; ecx =  qlp_coeff[12]
+	imul	ecx, [edi - 52]			; ecx =  qlp_coeff[12] * data[i-13]
+	add	ebp, ecx			; sum += qlp_coeff[12] * data[i-13]
+	mov	ecx, [eax + 44]			; ecx =  qlp_coeff[11]
+	imul	ecx, [edi - 48]			; ecx =  qlp_coeff[11] * data[i-12]
+	add	ebp, ecx			; sum += qlp_coeff[11] * data[i-12]
+	mov	ecx, [eax + 40]			; ecx =  qlp_coeff[10]
+	imul	ecx, [edi - 44]			; ecx =  qlp_coeff[10] * data[i-11]
+	add	ebp, ecx			; sum += qlp_coeff[10] * data[i-11]
+	mov	ecx, [eax + 36]			; ecx =  qlp_coeff[ 9]
+	imul	ecx, [edi - 40]			; ecx =  qlp_coeff[ 9] * data[i-10]
+	add	ebp, ecx			; sum += qlp_coeff[ 9] * data[i-10]
+	mov	ecx, [eax + 32]			; ecx =  qlp_coeff[ 8]
+	imul	ecx, [edi - 36]			; ecx =  qlp_coeff[ 8] * data[i- 9]
+	add	ebp, ecx			; sum += qlp_coeff[ 8] * data[i- 9]
+	mov	ecx, [eax + 28]			; ecx =  qlp_coeff[ 7]
+	imul	ecx, [edi - 32]			; ecx =  qlp_coeff[ 7] * data[i- 8]
+	add	ebp, ecx			; sum += qlp_coeff[ 7] * data[i- 8]
+	mov	ecx, [eax + 24]			; ecx =  qlp_coeff[ 6]
+	imul	ecx, [edi - 28]			; ecx =  qlp_coeff[ 6] * data[i- 7]
+	add	ebp, ecx			; sum += qlp_coeff[ 6] * data[i- 7]
+	mov	ecx, [eax + 20]			; ecx =  qlp_coeff[ 5]
+	imul	ecx, [edi - 24]			; ecx =  qlp_coeff[ 5] * data[i- 6]
+	add	ebp, ecx			; sum += qlp_coeff[ 5] * data[i- 6]
+	mov	ecx, [eax + 16]			; ecx =  qlp_coeff[ 4]
+	imul	ecx, [edi - 20]			; ecx =  qlp_coeff[ 4] * data[i- 5]
+	add	ebp, ecx			; sum += qlp_coeff[ 4] * data[i- 5]
+	mov	ecx, [eax + 12]			; ecx =  qlp_coeff[ 3]
+	imul	ecx, [edi - 16]			; ecx =  qlp_coeff[ 3] * data[i- 4]
+	add	ebp, ecx			; sum += qlp_coeff[ 3] * data[i- 4]
+	mov	ecx, [eax + 8]			; ecx =  qlp_coeff[ 2]
+	imul	ecx, [edi - 12]			; ecx =  qlp_coeff[ 2] * data[i- 3]
+	add	ebp, ecx			; sum += qlp_coeff[ 2] * data[i- 3]
+	mov	ecx, [eax + 4]			; ecx =  qlp_coeff[ 1]
+	imul	ecx, [edi - 8]			; ecx =  qlp_coeff[ 1] * data[i- 2]
+	add	ebp, ecx			; sum += qlp_coeff[ 1] * data[i- 2]
+	mov	ecx, [eax]			; ecx =  qlp_coeff[ 0] (NOTE: one byte missing from instruction)
+	imul	ecx, [edi - 4]			; ecx =  qlp_coeff[ 0] * data[i- 1]
+	add	ebp, ecx			; sum += qlp_coeff[ 0] * data[i- 1]
+.jumper_0:
+
+	mov	cl, [esp + 36]
+	sar	ebp, cl				; ebp = (sum >> lp_quantization)
+	add	ebp, [esi + edi]		; ebp = residual[i] + (sum >> lp_quantization)
+	mov	[edi], ebp			; data[i] = residual[i] + (sum >> lp_quantization)
+	add	edi, byte 4
+
+	dec	ebx
+	jz	short .end
+	xor	ebp, ebp
+	jmp	edx
+
+.end:
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+
+; WATCHOUT: this routine works on 16 bit data which means bits-per-sample for
+; the channel and qlp_coeffs must be <= 16.  Especially note that this routine
+; cannot be used for side-channel coded 16bps channels since the effective bps
+; is 17.
+; WATCHOUT: this routine requires that each data array have a buffer of up to
+; 3 zeroes in front (at negative indices) for alignment purposes, i.e. for each
+; channel n, data[n][-1] through data[n][-3] should be accessible and zero.
+	ALIGN	16
+cident FLAC__lpc_restore_signal_asm_ia32_mmx
+	;[esp + 40]	data[]
+	;[esp + 36]	lp_quantization
+	;[esp + 32]	order
+	;[esp + 28]	qlp_coeff[]
+	;[esp + 24]	data_len
+	;[esp + 20]	residual[]
+
+	;ASSERT(order > 0)
+
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+
+	mov	esi, [esp + 20]
+	mov	edi, [esp + 40]
+	mov	eax, [esp + 32]
+	mov	ebx, [esp + 24]
+
+	test	ebx, ebx
+	jz	near .end			; do nothing if data_len == 0
+	cmp	eax, byte 4
+	jb	near FLAC__lpc_restore_signal_asm_ia32.begin
+
+	mov	edx, [esp + 28]
+	movd	mm6, [esp + 36]
+	mov	ebp, esp
+
+	and	esp, 0xfffffff8
+
+	xor	ecx, ecx
+.copy_qlp_loop:
+	push	word [edx + 4 * ecx]
+	inc	ecx
+	cmp	ecx, eax
+	jnz	short .copy_qlp_loop
+
+	and	ecx, 0x3
+	test	ecx, ecx
+	je	short .za_end
+	sub	ecx, byte 4
+.za_loop:
+	push	word 0
+	inc	eax
+	inc	ecx
+	jnz	short .za_loop
+.za_end:
+
+	movq	mm5, [esp + 2 * eax - 8]
+	movd	mm4, [edi - 16]
+	punpckldq	mm4, [edi - 12]
+	movd	mm0, [edi - 8]
+	punpckldq	mm0, [edi - 4]
+	packssdw	mm4, mm0
+
+	cmp	eax, byte 4
+	jnbe	short .mmx_4more
+
+	ALIGN	16
+.mmx_4_loop_i:
+	movq	mm7, mm4
+	pmaddwd	mm7, mm5
+	movq	mm0, mm7
+	punpckhdq	mm7, mm7
+	paddd	mm7, mm0
+	psrad	mm7, mm6
+	movd	mm1, [esi]
+	paddd	mm7, mm1
+	movd	[edi], mm7
+	psllq	mm7, 48
+	psrlq	mm4, 16
+	por	mm4, mm7
+
+	add	esi, byte 4
+	add	edi, byte 4
+
+	dec	ebx
+	jnz	.mmx_4_loop_i
+	jmp	.mmx_end
+.mmx_4more:
+	shl	eax, 2
+	neg	eax
+	add	eax, byte 16
+	ALIGN	16
+.mmx_4more_loop_i:
+	mov	ecx, edi
+	add	ecx, eax
+	mov	edx, esp
+
+	movq	mm7, mm4
+	pmaddwd	mm7, mm5
+
+	ALIGN	16
+.mmx_4more_loop_j:
+	movd	mm0, [ecx - 16]
+	punpckldq	mm0, [ecx - 12]
+	movd	mm1, [ecx - 8]
+	punpckldq	mm1, [ecx - 4]
+	packssdw	mm0, mm1
+	pmaddwd	mm0, [edx]
+	paddd	mm7, mm0
+
+	add	edx, byte 8
+	add	ecx, byte 16
+	cmp	ecx, edi
+	jnz	.mmx_4more_loop_j
+
+	movq	mm0, mm7
+	punpckhdq	mm7, mm7
+	paddd	mm7, mm0
+	psrad	mm7, mm6
+	movd	mm1, [esi]
+	paddd	mm7, mm1
+	movd	[edi], mm7
+	psllq	mm7, 48
+	psrlq	mm4, 16
+	por	mm4, mm7
+
+	add	esi, byte 4
+	add	edi, byte 4
+
+	dec	ebx
+	jnz	short .mmx_4more_loop_i
+.mmx_end:
+	emms
+	mov	esp, ebp
+
+.end:
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+
+end
+
+%ifdef OBJ_FORMAT_elf
+       section .note.GNU-stack noalloc
+%endif
diff --git a/FLAC/ia32/nasm.h b/FLAC/ia32/nasm.h
index 5951fb00ae..df7f30ab88 100644
--- a/FLAC/ia32/nasm.h
+++ b/FLAC/ia32/nasm.h
@@ -1,5 +1,5 @@
 ;  libFLAC - Free Lossless Audio Codec library
-;  Copyright (C) 2001,2002,2003,2004,2005  Josh Coalson
+;  Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
 ;
 ;  Redistribution and use in source and binary forms, with or without
 ;  modification, are permitted provided that the following conditions
diff --git a/FLAC/ia32/stream_encoder_asm.nasm b/FLAC/ia32/stream_encoder_asm.nasm
new file mode 100644
index 0000000000..8d4ef6f948
--- /dev/null
+++ b/FLAC/ia32/stream_encoder_asm.nasm
@@ -0,0 +1,159 @@
+;  vim:filetype=nasm ts=8
+
+;  libFLAC - Free Lossless Audio Codec library
+;  Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+;
+;  Redistribution and use in source and binary forms, with or without
+;  modification, are permitted provided that the following conditions
+;  are met:
+;
+;  - Redistributions of source code must retain the above copyright
+;  notice, this list of conditions and the following disclaimer.
+;
+;  - Redistributions in binary form must reproduce the above copyright
+;  notice, this list of conditions and the following disclaimer in the
+;  documentation and/or other materials provided with the distribution.
+;
+;  - Neither the name of the Xiph.org Foundation nor the names of its
+;  contributors may be used to endorse or promote products derived from
+;  this software without specific prior written permission.
+;
+;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+;  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+;  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+;  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+%include "ia32/nasm.h"
+
+	data_section
+
+cglobal precompute_partition_info_sums_32bit_asm_ia32_
+
+	code_section
+
+
+; **********************************************************************
+;
+; void FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter)
+; void precompute_partition_info_sums_32bit_(
+; 	const FLAC__int32 residual[],
+; 	FLAC__uint64 abs_residual_partition_sums[],
+; 	unsigned blocksize,
+; 	unsigned predictor_order,
+; 	unsigned min_partition_order,
+; 	unsigned max_partition_order
+; )
+;
+	ALIGN 16
+cident precompute_partition_info_sums_32bit_asm_ia32_
+
+	;; peppered throughout the code at major checkpoints are keys like this as to where things are at that point in time
+	;; [esp + 4]	const FLAC__int32 residual[]
+	;; [esp + 8]	FLAC__uint64 abs_residual_partition_sums[]
+	;; [esp + 12]	unsigned blocksize
+	;; [esp + 16]	unsigned predictor_order
+	;; [esp + 20]	unsigned min_partition_order
+	;; [esp + 24]	unsigned max_partition_order
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	sub	esp, 8
+	;; [esp + 28]	const FLAC__int32 residual[]
+	;; [esp + 32]	FLAC__uint64 abs_residual_partition_sums[]
+	;; [esp + 36]	unsigned blocksize
+	;; [esp + 40]	unsigned predictor_order
+	;; [esp + 44]	unsigned min_partition_order
+	;; [esp + 48]	unsigned max_partition_order
+	;; [esp]	partitions
+	;; [esp + 4]	default_partition_samples
+
+	mov	ecx, [esp + 48]
+	mov	eax, 1
+	shl	eax, cl
+	mov	[esp], eax		; [esp] <- partitions = 1u << max_partition_order;
+	mov	eax, [esp + 36]
+	shr	eax, cl
+	mov	[esp + 4], eax		; [esp + 4] <- default_partition_samples = blocksize >> max_partition_order;
+
+	;
+	; first do max_partition_order
+	;
+	mov	edi, [esp + 4]
+	sub	edi, [esp + 40]		; edi <- end = (unsigned)(-(int)predictor_order) + default_partition_samples
+	xor	esi, esi		; esi <- residual_sample = 0
+	xor	ecx, ecx		; ecx <- partition = 0
+	mov	ebp, [esp + 28]		; ebp <- residual[]
+	xor	ebx, ebx		; ebx <- abs_residual_partition_sum = 0;
+	; note we put the updates to 'end' and 'abs_residual_partition_sum' at the end of loop0 and in the initialization above so we could align loop0 and loop1
+	ALIGN	16
+.loop0:					; for(partition = residual_sample = 0; partition < partitions; partition++) {
+.loop1:					;   for( ; residual_sample < end; residual_sample++)
+	mov	eax, [ebp + esi * 4]
+	cdq
+	xor	eax, edx
+	sub	eax, edx
+	add	ebx, eax		;     abs_residual_partition_sum += abs(residual[residual_sample]);
+	;@@@@@@ check overflow flag and abort here?
+	add	esi, byte 1
+	cmp	esi, edi		;   /* since the loop will always run at least once, we can put the loop check down here */
+	jb	.loop1
+.next1:
+	add	edi, [esp + 4]		;   end += default_partition_samples;
+	mov	eax, [esp + 32]
+	mov	[eax + ecx * 8], ebx	;   abs_residual_partition_sums[partition] = abs_residual_partition_sum;
+	mov	[eax + ecx * 8 + 4], dword 0
+	xor	ebx, ebx		;   abs_residual_partition_sum = 0;
+	add	ecx, byte 1
+	cmp	ecx, [esp]		; /* since the loop will always run at least once, we can put the loop check down here */
+	jb	.loop0
+.next0:					; }
+	;
+	; now merge partitions for lower orders
+	;
+	mov	esi, [esp + 32]		; esi <- abs_residual_partition_sums[from_partition==0];
+	mov	eax, [esp]
+	lea	edi, [esi + eax * 8]	; edi <- abs_residual_partition_sums[to_partition==partitions];
+	mov	ecx, [esp + 48]
+	sub	ecx, byte 1		; ecx <- partition_order = (int)max_partition_order - 1;
+	ALIGN 16
+.loop2:					; for(; partition_order >= (int)min_partition_order; partition_order--) {
+	cmp	ecx, [esp + 44]
+	jl	.next2
+	mov	edx, 1
+	shl	edx, cl			;   const unsigned partitions = 1u << partition_order;
+	ALIGN 16
+.loop3:					;   for(i = 0; i < partitions; i++) {
+	mov	eax, [esi]
+	mov	ebx, [esi + 4]
+	add	eax, [esi + 8]
+	adc	ebx, [esi + 12]
+	mov	[edi], eax
+	mov	[edi + 4], ebx		;     a_r_p_s[to_partition] = a_r_p_s[from_partition] + a_r_p_s[from_partition+1];
+	add	esi, byte 16
+	add	edi, byte 8
+	sub	edx, byte 1
+	jnz	.loop3			;   }
+	sub	ecx, byte 1
+	jmp	.loop2			; }
+.next2:
+
+	add	esp, 8
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+
+end
+
+%ifdef OBJ_FORMAT_elf
+	section .note.GNU-stack noalloc
+%endif
diff --git a/FLAC/lpc.c b/FLAC/lpc.c
index 38e0d00d0e..24ddd552b9 100644
--- a/FLAC/lpc.c
+++ b/FLAC/lpc.c
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,6 +29,10 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
 #include <math.h>
 #include "FLAC/assert.h"
 #include "FLAC/format.h"
@@ -45,6 +49,17 @@
 #define M_LN2 0.69314718055994530942
 #endif
 
+/* OPT: #undef'ing this may improve the speed on some architectures */
+#define FLAC__LPC_UNROLLED_FILTER_LOOPS
+
+
+void FLAC__lpc_window_data(const FLAC__int32 in[], const FLAC__real window[], FLAC__real out[], unsigned data_len)
+{
+	unsigned i;
+	for(i = 0; i < data_len; i++)
+		out[i] = in[i] * window[i];
+}
+
 void FLAC__lpc_compute_autocorrelation(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
 {
 	/* a readable, but slower, version */
@@ -55,6 +70,13 @@ void FLAC__lpc_compute_autocorrelation(const FLAC__real data[], unsigned data_le
 	FLAC__ASSERT(lag > 0);
 	FLAC__ASSERT(lag <= data_len);
 
+	/*
+	 * Technically we should subtract the mean first like so:
+	 *   for(i = 0; i < data_len; i++)
+	 *     data[i] -= mean;
+	 * but it appears not to make enough of a difference to matter, and
+	 * most signals are already closely centered around zero
+	 */
 	while(lag--) {
 		for(i = lag, d = 0.0; i < data_len; i++)
 			d += data[i] * data[i - lag];
@@ -87,18 +109,19 @@ void FLAC__lpc_compute_autocorrelation(const FLAC__real data[], unsigned data_le
 	}
 }
 
-void FLAC__lpc_compute_lp_coefficients(const FLAC__real autoc[], unsigned max_order, FLAC__real lp_coeff[][FLAC__MAX_LPC_ORDER], FLAC__double error[])
+void FLAC__lpc_compute_lp_coefficients(const FLAC__real autoc[], unsigned *max_order, FLAC__real lp_coeff[][FLAC__MAX_LPC_ORDER], FLAC__double error[])
 {
 	unsigned i, j;
 	FLAC__double r, err, ref[FLAC__MAX_LPC_ORDER], lpc[FLAC__MAX_LPC_ORDER];
 
-	FLAC__ASSERT(0 < max_order);
-	FLAC__ASSERT(max_order <= FLAC__MAX_LPC_ORDER);
+	FLAC__ASSERT(0 != max_order);
+	FLAC__ASSERT(0 < *max_order);
+	FLAC__ASSERT(*max_order <= FLAC__MAX_LPC_ORDER);
 	FLAC__ASSERT(autoc[0] != 0.0);
 
 	err = autoc[0];
 
-	for(i = 0; i < max_order; i++) {
+	for(i = 0; i < *max_order; i++) {
 		/* Sum up this iteration's reflection coefficient. */
 		r = -autoc[i+1];
 		for(j = 0; j < i; j++)
@@ -121,16 +144,20 @@ void FLAC__lpc_compute_lp_coefficients(const FLAC__real autoc[], unsigned max_or
 		for(j = 0; j <= i; j++)
 			lp_coeff[i][j] = (FLAC__real)(-lpc[j]); /* negate FIR filter coeff to get predictor coeff */
 		error[i] = err;
+
+		/* see SF bug #1601812 http://sourceforge.net/tracker/index.php?func=detail&aid=1601812&group_id=13478&atid=113478 */
+		if(err == 0.0) {
+			*max_order = i+1;
+			return;
+		}
 	}
 }
 
 int FLAC__lpc_quantize_coefficients(const FLAC__real lp_coeff[], unsigned order, unsigned precision, FLAC__int32 qlp_coeff[], int *shift)
 {
 	unsigned i;
-	FLAC__double d, cmax = -1e32;
+	FLAC__double cmax;
 	FLAC__int32 qmax, qmin;
-	const int max_shiftlimit = (1 << (FLAC__SUBFRAME_LPC_QLP_SHIFT_LEN-1)) - 1;
-	const int min_shiftlimit = -max_shiftlimit - 1;
 
 	FLAC__ASSERT(precision > 0);
 	FLAC__ASSERT(precision >= FLAC__MIN_QLP_COEFF_PRECISION);
@@ -141,77 +168,104 @@ int FLAC__lpc_quantize_coefficients(const FLAC__real lp_coeff[], unsigned order,
 	qmin = -qmax;
 	qmax--;
 
+	/* calc cmax = max( |lp_coeff[i]| ) */
+	cmax = 0.0;
 	for(i = 0; i < order; i++) {
-		if(lp_coeff[i] == 0.0)
-			continue;
-		d = fabs(lp_coeff[i]);
+		const FLAC__double d = fabs(lp_coeff[i]);
 		if(d > cmax)
 			cmax = d;
 	}
-redo_it:
+
 	if(cmax <= 0.0) {
 		/* => coefficients are all 0, which means our constant-detect didn't work */
 		return 2;
 	}
 	else {
+		const int max_shiftlimit = (1 << (FLAC__SUBFRAME_LPC_QLP_SHIFT_LEN-1)) - 1;
+		const int min_shiftlimit = -max_shiftlimit - 1;
 		int log2cmax;
 
 		(void)frexp(cmax, &log2cmax);
 		log2cmax--;
 		*shift = (int)precision - log2cmax - 1;
 
-		if(*shift < min_shiftlimit || *shift > max_shiftlimit) {
-#if 0
-			/*@@@ this does not seem to help at all, but was not extensively tested either: */
-			if(*shift > max_shiftlimit)
-				*shift = max_shiftlimit;
-			else
-#endif
-				return 1;
-		}
+		if(*shift > max_shiftlimit)
+			*shift = max_shiftlimit;
+		else if(*shift < min_shiftlimit)
+			return 1;
 	}
 
 	if(*shift >= 0) {
+		FLAC__double error = 0.0;
+		FLAC__int32 q;
 		for(i = 0; i < order; i++) {
-			qlp_coeff[i] = (FLAC__int32)floor((FLAC__double)lp_coeff[i] * (FLAC__double)(1 << *shift));
-
-			/* double-check the result */
-			if(qlp_coeff[i] > qmax || qlp_coeff[i] < qmin) {
-#ifdef FLAC__OVERFLOW_DETECT
-				fprintf(stderr,"FLAC__lpc_quantize_coefficients: compensating for overflow, qlp_coeff[%u]=%d, lp_coeff[%u]=%f, cmax=%f, precision=%u, shift=%d, q=%f, f(q)=%f\n", i, qlp_coeff[i], i, lp_coeff[i], cmax, precision, *shift, (FLAC__double)lp_coeff[i] * (FLAC__double)(1 << *shift), floor((FLAC__double)lp_coeff[i] * (FLAC__double)(1 << *shift)));
+			error += lp_coeff[i] * (1 << *shift);
+#if 1 /* unfortunately lround() is C99 */
+			if(error >= 0.0)
+				q = (FLAC__int32)(error + 0.5);
+			else
+				q = (FLAC__int32)(error - 0.5);
+#else
+			q = lround(error);
 #endif
-				cmax *= 2.0;
-				goto redo_it;
-			}
+#ifdef FLAC__OVERFLOW_DETECT
+			if(q > qmax+1) /* we expect q==qmax+1 occasionally due to rounding */
+				fprintf(stderr,"FLAC__lpc_quantize_coefficients: quantizer overflow: q>qmax %d>%d shift=%d cmax=%f precision=%u lpc[%u]=%f\n",q,qmax,*shift,cmax,precision+1,i,lp_coeff[i]);
+			else if(q < qmin)
+				fprintf(stderr,"FLAC__lpc_quantize_coefficients: quantizer overflow: q<qmin %d<%d shift=%d cmax=%f precision=%u lpc[%u]=%f\n",q,qmin,*shift,cmax,precision+1,i,lp_coeff[i]);
+#endif
+			if(q > qmax)
+				q = qmax;
+			else if(q < qmin)
+				q = qmin;
+			error -= q;
+			qlp_coeff[i] = q;
 		}
 	}
-	else { /* (*shift < 0) */
+	/* negative shift is very rare but due to design flaw, negative shift is
+	 * a NOP in the decoder, so it must be handled specially by scaling down
+	 * coeffs
+	 */
+	else {
 		const int nshift = -(*shift);
+		FLAC__double error = 0.0;
+		FLAC__int32 q;
 #ifdef DEBUG
-		fprintf(stderr,"FLAC__lpc_quantize_coefficients: negative shift = %d\n", *shift);
+		fprintf(stderr,"FLAC__lpc_quantize_coefficients: negative shift=%d order=%u cmax=%f\n", *shift, order, cmax);
 #endif
 		for(i = 0; i < order; i++) {
-			qlp_coeff[i] = (FLAC__int32)floor((FLAC__double)lp_coeff[i] / (FLAC__double)(1 << nshift));
-
-			/* double-check the result */
-			if(qlp_coeff[i] > qmax || qlp_coeff[i] < qmin) {
-#ifdef FLAC__OVERFLOW_DETECT
-				fprintf(stderr,"FLAC__lpc_quantize_coefficients: compensating for overflow, qlp_coeff[%u]=%d, lp_coeff[%u]=%f, cmax=%f, precision=%u, shift=%d, q=%f, f(q)=%f\n", i, qlp_coeff[i], i, lp_coeff[i], cmax, precision, *shift, (FLAC__double)lp_coeff[i] / (FLAC__double)(1 << nshift), floor((FLAC__double)lp_coeff[i] / (FLAC__double)(1 << nshift)));
+			error += lp_coeff[i] / (1 << nshift);
+#if 1 /* unfortunately lround() is C99 */
+			if(error >= 0.0)
+				q = (FLAC__int32)(error + 0.5);
+			else
+				q = (FLAC__int32)(error - 0.5);
+#else
+			q = lround(error);
 #endif
-				cmax *= 2.0;
-				goto redo_it;
-			}
+#ifdef FLAC__OVERFLOW_DETECT
+			if(q > qmax+1) /* we expect q==qmax+1 occasionally due to rounding */
+				fprintf(stderr,"FLAC__lpc_quantize_coefficients: quantizer overflow: q>qmax %d>%d shift=%d cmax=%f precision=%u lpc[%u]=%f\n",q,qmax,*shift,cmax,precision+1,i,lp_coeff[i]);
+			else if(q < qmin)
+				fprintf(stderr,"FLAC__lpc_quantize_coefficients: quantizer overflow: q<qmin %d<%d shift=%d cmax=%f precision=%u lpc[%u]=%f\n",q,qmin,*shift,cmax,precision+1,i,lp_coeff[i]);
+#endif
+			if(q > qmax)
+				q = qmax;
+			else if(q < qmin)
+				q = qmin;
+			error -= q;
+			qlp_coeff[i] = q;
 		}
+		*shift = 0;
 	}
 
 	return 0;
 }
 
 void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
+#if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
 {
-#ifdef FLAC__OVERFLOW_DETECT
 	FLAC__int64 sumo;
-#endif
 	unsigned i, j;
 	FLAC__int32 sum;
 	const FLAC__int32 *history;
@@ -225,22 +279,18 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 *data, u
 	FLAC__ASSERT(order > 0);
 
 	for(i = 0; i < data_len; i++) {
-#ifdef FLAC__OVERFLOW_DETECT
 		sumo = 0;
-#endif
 		sum = 0;
 		history = data;
 		for(j = 0; j < order; j++) {
 			sum += qlp_coeff[j] * (*(--history));
-#ifdef FLAC__OVERFLOW_DETECT
 			sumo += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*history);
 #if defined _MSC_VER
 			if(sumo > 2147483647I64 || sumo < -2147483648I64)
 				fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients: OVERFLOW, i=%u, j=%u, c=%d, d=%d, sumo=%I64d\n",i,j,qlp_coeff[j],*history,sumo);
 #else
 			if(sumo > 2147483647ll || sumo < -2147483648ll)
-				fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients: OVERFLOW, i=%u, j=%u, c=%d, d=%d, sumo=%lld\n",i,j,qlp_coeff[j],*history,sumo);
-#endif
+				fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients: OVERFLOW, i=%u, j=%u, c=%d, d=%d, sumo=%lld\n",i,j,qlp_coeff[j],*history,(long long)sumo);
 #endif
 		}
 		*(residual++) = *(data++) - (sum >> lp_quantization);
@@ -255,8 +305,231 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 *data, u
 	}
 	*/
 }
+#else /* fully unrolled version for normal use */
+{
+	int i;
+	FLAC__int32 sum;
+
+	FLAC__ASSERT(order > 0);
+	FLAC__ASSERT(order <= 32);
+
+	/*
+	 * We do unique versions up to 12th order since that's the subset limit.
+	 * Also they are roughly ordered to match frequency of occurrence to
+	 * minimize branching.
+	 */
+	if(order <= 12) {
+		if(order > 8) {
+			if(order > 10) {
+				if(order == 12) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[11] * data[i-12];
+						sum += qlp_coeff[10] * data[i-11];
+						sum += qlp_coeff[9] * data[i-10];
+						sum += qlp_coeff[8] * data[i-9];
+						sum += qlp_coeff[7] * data[i-8];
+						sum += qlp_coeff[6] * data[i-7];
+						sum += qlp_coeff[5] * data[i-6];
+						sum += qlp_coeff[4] * data[i-5];
+						sum += qlp_coeff[3] * data[i-4];
+						sum += qlp_coeff[2] * data[i-3];
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						residual[i] = data[i] - (sum >> lp_quantization);
+					}
+				}
+				else { /* order == 11 */
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[10] * data[i-11];
+						sum += qlp_coeff[9] * data[i-10];
+						sum += qlp_coeff[8] * data[i-9];
+						sum += qlp_coeff[7] * data[i-8];
+						sum += qlp_coeff[6] * data[i-7];
+						sum += qlp_coeff[5] * data[i-6];
+						sum += qlp_coeff[4] * data[i-5];
+						sum += qlp_coeff[3] * data[i-4];
+						sum += qlp_coeff[2] * data[i-3];
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						residual[i] = data[i] - (sum >> lp_quantization);
+					}
+				}
+			}
+			else {
+				if(order == 10) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[9] * data[i-10];
+						sum += qlp_coeff[8] * data[i-9];
+						sum += qlp_coeff[7] * data[i-8];
+						sum += qlp_coeff[6] * data[i-7];
+						sum += qlp_coeff[5] * data[i-6];
+						sum += qlp_coeff[4] * data[i-5];
+						sum += qlp_coeff[3] * data[i-4];
+						sum += qlp_coeff[2] * data[i-3];
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						residual[i] = data[i] - (sum >> lp_quantization);
+					}
+				}
+				else { /* order == 9 */
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[8] * data[i-9];
+						sum += qlp_coeff[7] * data[i-8];
+						sum += qlp_coeff[6] * data[i-7];
+						sum += qlp_coeff[5] * data[i-6];
+						sum += qlp_coeff[4] * data[i-5];
+						sum += qlp_coeff[3] * data[i-4];
+						sum += qlp_coeff[2] * data[i-3];
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						residual[i] = data[i] - (sum >> lp_quantization);
+					}
+				}
+			}
+		}
+		else if(order > 4) {
+			if(order > 6) {
+				if(order == 8) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[7] * data[i-8];
+						sum += qlp_coeff[6] * data[i-7];
+						sum += qlp_coeff[5] * data[i-6];
+						sum += qlp_coeff[4] * data[i-5];
+						sum += qlp_coeff[3] * data[i-4];
+						sum += qlp_coeff[2] * data[i-3];
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						residual[i] = data[i] - (sum >> lp_quantization);
+					}
+				}
+				else { /* order == 7 */
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[6] * data[i-7];
+						sum += qlp_coeff[5] * data[i-6];
+						sum += qlp_coeff[4] * data[i-5];
+						sum += qlp_coeff[3] * data[i-4];
+						sum += qlp_coeff[2] * data[i-3];
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						residual[i] = data[i] - (sum >> lp_quantization);
+					}
+				}
+			}
+			else {
+				if(order == 6) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[5] * data[i-6];
+						sum += qlp_coeff[4] * data[i-5];
+						sum += qlp_coeff[3] * data[i-4];
+						sum += qlp_coeff[2] * data[i-3];
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						residual[i] = data[i] - (sum >> lp_quantization);
+					}
+				}
+				else { /* order == 5 */
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[4] * data[i-5];
+						sum += qlp_coeff[3] * data[i-4];
+						sum += qlp_coeff[2] * data[i-3];
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						residual[i] = data[i] - (sum >> lp_quantization);
+					}
+				}
+			}
+		}
+		else {
+			if(order > 2) {
+				if(order == 4) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[3] * data[i-4];
+						sum += qlp_coeff[2] * data[i-3];
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						residual[i] = data[i] - (sum >> lp_quantization);
+					}
+				}
+				else { /* order == 3 */
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[2] * data[i-3];
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						residual[i] = data[i] - (sum >> lp_quantization);
+					}
+				}
+			}
+			else {
+				if(order == 2) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						residual[i] = data[i] - (sum >> lp_quantization);
+					}
+				}
+				else { /* order == 1 */
+					for(i = 0; i < (int)data_len; i++)
+						residual[i] = data[i] - ((qlp_coeff[0] * data[i-1]) >> lp_quantization);
+				}
+			}
+		}
+	}
+	else { /* order > 12 */
+		for(i = 0; i < (int)data_len; i++) {
+			sum = 0;
+			switch(order) {
+				case 32: sum += qlp_coeff[31] * data[i-32];
+				case 31: sum += qlp_coeff[30] * data[i-31];
+				case 30: sum += qlp_coeff[29] * data[i-30];
+				case 29: sum += qlp_coeff[28] * data[i-29];
+				case 28: sum += qlp_coeff[27] * data[i-28];
+				case 27: sum += qlp_coeff[26] * data[i-27];
+				case 26: sum += qlp_coeff[25] * data[i-26];
+				case 25: sum += qlp_coeff[24] * data[i-25];
+				case 24: sum += qlp_coeff[23] * data[i-24];
+				case 23: sum += qlp_coeff[22] * data[i-23];
+				case 22: sum += qlp_coeff[21] * data[i-22];
+				case 21: sum += qlp_coeff[20] * data[i-21];
+				case 20: sum += qlp_coeff[19] * data[i-20];
+				case 19: sum += qlp_coeff[18] * data[i-19];
+				case 18: sum += qlp_coeff[17] * data[i-18];
+				case 17: sum += qlp_coeff[16] * data[i-17];
+				case 16: sum += qlp_coeff[15] * data[i-16];
+				case 15: sum += qlp_coeff[14] * data[i-15];
+				case 14: sum += qlp_coeff[13] * data[i-14];
+				case 13: sum += qlp_coeff[12] * data[i-13];
+				         sum += qlp_coeff[11] * data[i-12];
+				         sum += qlp_coeff[10] * data[i-11];
+				         sum += qlp_coeff[ 9] * data[i-10];
+				         sum += qlp_coeff[ 8] * data[i- 9];
+				         sum += qlp_coeff[ 7] * data[i- 8];
+				         sum += qlp_coeff[ 6] * data[i- 7];
+				         sum += qlp_coeff[ 5] * data[i- 6];
+				         sum += qlp_coeff[ 4] * data[i- 5];
+				         sum += qlp_coeff[ 3] * data[i- 4];
+				         sum += qlp_coeff[ 2] * data[i- 3];
+				         sum += qlp_coeff[ 1] * data[i- 2];
+				         sum += qlp_coeff[ 0] * data[i- 1];
+			}
+			residual[i] = data[i] - (sum >> lp_quantization);
+		}
+	}
+}
+#endif
 
 void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
+#if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
 {
 	unsigned i, j;
 	FLAC__int64 sum;
@@ -275,30 +548,257 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 *da
 		history = data;
 		for(j = 0; j < order; j++)
 			sum += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*(--history));
-#ifdef FLAC__OVERFLOW_DETECT
 		if(FLAC__bitmath_silog2_wide(sum >> lp_quantization) > 32) {
-			fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: OVERFLOW, i=%u, sum=%lld\n", i, sum >> lp_quantization);
+#if defined _MSC_VER
+			fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: OVERFLOW, i=%u, sum=%I64d\n", i, sum >> lp_quantization);
+#else
+			fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: OVERFLOW, i=%u, sum=%lld\n", i, (long long)(sum >> lp_quantization));
+#endif
 			break;
 		}
 		if(FLAC__bitmath_silog2_wide((FLAC__int64)(*data) - (sum >> lp_quantization)) > 32) {
-			fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: OVERFLOW, i=%u, data=%d, sum=%lld, residual=%lld\n", i, *data, sum >> lp_quantization, (FLAC__int64)(*data) - (sum >> lp_quantization));
+#if defined _MSC_VER
+			fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: OVERFLOW, i=%u, data=%d, sum=%I64d, residual=%I64d\n", i, *data, sum >> lp_quantization, (FLAC__int64)(*data) - (sum >> lp_quantization));
+#else
+			fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: OVERFLOW, i=%u, data=%d, sum=%lld, residual=%lld\n", i, *data, (long long)(sum >> lp_quantization), (long long)((FLAC__int64)(*data) - (sum >> lp_quantization)));
+#endif
 			break;
 		}
-#endif
 		*(residual++) = *(data++) - (FLAC__int32)(sum >> lp_quantization);
 	}
 }
+#else /* fully unrolled version for normal use */
+{
+	int i;
+	FLAC__int64 sum;
+
+	FLAC__ASSERT(order > 0);
+	FLAC__ASSERT(order <= 32);
+
+	/*
+	 * We do unique versions up to 12th order since that's the subset limit.
+	 * Also they are roughly ordered to match frequency of occurrence to
+	 * minimize branching.
+	 */
+	if(order <= 12) {
+		if(order > 8) {
+			if(order > 10) {
+				if(order == 12) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[11] * (FLAC__int64)data[i-12];
+						sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
+						sum += qlp_coeff[9] * (FLAC__int64)data[i-10];
+						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
+						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
+						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
+						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
+						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
+						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
+						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+				else { /* order == 11 */
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
+						sum += qlp_coeff[9] * (FLAC__int64)data[i-10];
+						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
+						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
+						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
+						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
+						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
+						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
+						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+			}
+			else {
+				if(order == 10) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[9] * (FLAC__int64)data[i-10];
+						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
+						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
+						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
+						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
+						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
+						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
+						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+				else { /* order == 9 */
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
+						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
+						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
+						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
+						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
+						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
+						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+			}
+		}
+		else if(order > 4) {
+			if(order > 6) {
+				if(order == 8) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
+						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
+						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
+						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
+						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
+						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+				else { /* order == 7 */
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
+						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
+						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
+						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
+						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+			}
+			else {
+				if(order == 6) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
+						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
+						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
+						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+				else { /* order == 5 */
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
+						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
+						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+			}
+		}
+		else {
+			if(order > 2) {
+				if(order == 4) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
+						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+				else { /* order == 3 */
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+			}
+			else {
+				if(order == 2) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+				else { /* order == 1 */
+					for(i = 0; i < (int)data_len; i++)
+						residual[i] = data[i] - (FLAC__int32)((qlp_coeff[0] * (FLAC__int64)data[i-1]) >> lp_quantization);
+				}
+			}
+		}
+	}
+	else { /* order > 12 */
+		for(i = 0; i < (int)data_len; i++) {
+			sum = 0;
+			switch(order) {
+				case 32: sum += qlp_coeff[31] * (FLAC__int64)data[i-32];
+				case 31: sum += qlp_coeff[30] * (FLAC__int64)data[i-31];
+				case 30: sum += qlp_coeff[29] * (FLAC__int64)data[i-30];
+				case 29: sum += qlp_coeff[28] * (FLAC__int64)data[i-29];
+				case 28: sum += qlp_coeff[27] * (FLAC__int64)data[i-28];
+				case 27: sum += qlp_coeff[26] * (FLAC__int64)data[i-27];
+				case 26: sum += qlp_coeff[25] * (FLAC__int64)data[i-26];
+				case 25: sum += qlp_coeff[24] * (FLAC__int64)data[i-25];
+				case 24: sum += qlp_coeff[23] * (FLAC__int64)data[i-24];
+				case 23: sum += qlp_coeff[22] * (FLAC__int64)data[i-23];
+				case 22: sum += qlp_coeff[21] * (FLAC__int64)data[i-22];
+				case 21: sum += qlp_coeff[20] * (FLAC__int64)data[i-21];
+				case 20: sum += qlp_coeff[19] * (FLAC__int64)data[i-20];
+				case 19: sum += qlp_coeff[18] * (FLAC__int64)data[i-19];
+				case 18: sum += qlp_coeff[17] * (FLAC__int64)data[i-18];
+				case 17: sum += qlp_coeff[16] * (FLAC__int64)data[i-17];
+				case 16: sum += qlp_coeff[15] * (FLAC__int64)data[i-16];
+				case 15: sum += qlp_coeff[14] * (FLAC__int64)data[i-15];
+				case 14: sum += qlp_coeff[13] * (FLAC__int64)data[i-14];
+				case 13: sum += qlp_coeff[12] * (FLAC__int64)data[i-13];
+				         sum += qlp_coeff[11] * (FLAC__int64)data[i-12];
+				         sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
+				         sum += qlp_coeff[ 9] * (FLAC__int64)data[i-10];
+				         sum += qlp_coeff[ 8] * (FLAC__int64)data[i- 9];
+				         sum += qlp_coeff[ 7] * (FLAC__int64)data[i- 8];
+				         sum += qlp_coeff[ 6] * (FLAC__int64)data[i- 7];
+				         sum += qlp_coeff[ 5] * (FLAC__int64)data[i- 6];
+				         sum += qlp_coeff[ 4] * (FLAC__int64)data[i- 5];
+				         sum += qlp_coeff[ 3] * (FLAC__int64)data[i- 4];
+				         sum += qlp_coeff[ 2] * (FLAC__int64)data[i- 3];
+				         sum += qlp_coeff[ 1] * (FLAC__int64)data[i- 2];
+				         sum += qlp_coeff[ 0] * (FLAC__int64)data[i- 1];
+			}
+			residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+		}
+	}
+}
+#endif
 
 #endif /* !defined FLAC__INTEGER_ONLY_LIBRARY */
 
 void FLAC__lpc_restore_signal(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
+#if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
 {
-#ifdef FLAC__OVERFLOW_DETECT
 	FLAC__int64 sumo;
-#endif
 	unsigned i, j;
 	FLAC__int32 sum;
-	const FLAC__int32 *history;
+	const FLAC__int32 *r = residual, *history;
 
 #ifdef FLAC__OVERFLOW_DETECT_VERBOSE
 	fprintf(stderr,"FLAC__lpc_restore_signal: data_len=%d, order=%u, lpq=%d",data_len,order,lp_quantization);
@@ -309,25 +809,21 @@ void FLAC__lpc_restore_signal(const FLAC__int32 residual[], unsigned data_len, c
 	FLAC__ASSERT(order > 0);
 
 	for(i = 0; i < data_len; i++) {
-#ifdef FLAC__OVERFLOW_DETECT
 		sumo = 0;
-#endif
 		sum = 0;
 		history = data;
 		for(j = 0; j < order; j++) {
 			sum += qlp_coeff[j] * (*(--history));
-#ifdef FLAC__OVERFLOW_DETECT
 			sumo += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*history);
 #if defined _MSC_VER
 			if(sumo > 2147483647I64 || sumo < -2147483648I64)
 				fprintf(stderr,"FLAC__lpc_restore_signal: OVERFLOW, i=%u, j=%u, c=%d, d=%d, sumo=%I64d\n",i,j,qlp_coeff[j],*history,sumo);
 #else
 			if(sumo > 2147483647ll || sumo < -2147483648ll)
-				fprintf(stderr,"FLAC__lpc_restore_signal: OVERFLOW, i=%u, j=%u, c=%d, d=%d, sumo=%lld\n",i,j,qlp_coeff[j],*history,sumo);
-#endif
+				fprintf(stderr,"FLAC__lpc_restore_signal: OVERFLOW, i=%u, j=%u, c=%d, d=%d, sumo=%lld\n",i,j,qlp_coeff[j],*history,(long long)sumo);
 #endif
 		}
-		*(data++) = *(residual++) + (sum >> lp_quantization);
+		*(data++) = *(r++) + (sum >> lp_quantization);
 	}
 
 	/* Here's a slower but clearer version:
@@ -339,12 +835,235 @@ void FLAC__lpc_restore_signal(const FLAC__int32 residual[], unsigned data_len, c
 	}
 	*/
 }
+#else /* fully unrolled version for normal use */
+{
+	int i;
+	FLAC__int32 sum;
+
+	FLAC__ASSERT(order > 0);
+	FLAC__ASSERT(order <= 32);
+
+	/*
+	 * We do unique versions up to 12th order since that's the subset limit.
+	 * Also they are roughly ordered to match frequency of occurrence to
+	 * minimize branching.
+	 */
+	if(order <= 12) {
+		if(order > 8) {
+			if(order > 10) {
+				if(order == 12) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[11] * data[i-12];
+						sum += qlp_coeff[10] * data[i-11];
+						sum += qlp_coeff[9] * data[i-10];
+						sum += qlp_coeff[8] * data[i-9];
+						sum += qlp_coeff[7] * data[i-8];
+						sum += qlp_coeff[6] * data[i-7];
+						sum += qlp_coeff[5] * data[i-6];
+						sum += qlp_coeff[4] * data[i-5];
+						sum += qlp_coeff[3] * data[i-4];
+						sum += qlp_coeff[2] * data[i-3];
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						data[i] = residual[i] + (sum >> lp_quantization);
+					}
+				}
+				else { /* order == 11 */
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[10] * data[i-11];
+						sum += qlp_coeff[9] * data[i-10];
+						sum += qlp_coeff[8] * data[i-9];
+						sum += qlp_coeff[7] * data[i-8];
+						sum += qlp_coeff[6] * data[i-7];
+						sum += qlp_coeff[5] * data[i-6];
+						sum += qlp_coeff[4] * data[i-5];
+						sum += qlp_coeff[3] * data[i-4];
+						sum += qlp_coeff[2] * data[i-3];
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						data[i] = residual[i] + (sum >> lp_quantization);
+					}
+				}
+			}
+			else {
+				if(order == 10) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[9] * data[i-10];
+						sum += qlp_coeff[8] * data[i-9];
+						sum += qlp_coeff[7] * data[i-8];
+						sum += qlp_coeff[6] * data[i-7];
+						sum += qlp_coeff[5] * data[i-6];
+						sum += qlp_coeff[4] * data[i-5];
+						sum += qlp_coeff[3] * data[i-4];
+						sum += qlp_coeff[2] * data[i-3];
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						data[i] = residual[i] + (sum >> lp_quantization);
+					}
+				}
+				else { /* order == 9 */
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[8] * data[i-9];
+						sum += qlp_coeff[7] * data[i-8];
+						sum += qlp_coeff[6] * data[i-7];
+						sum += qlp_coeff[5] * data[i-6];
+						sum += qlp_coeff[4] * data[i-5];
+						sum += qlp_coeff[3] * data[i-4];
+						sum += qlp_coeff[2] * data[i-3];
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						data[i] = residual[i] + (sum >> lp_quantization);
+					}
+				}
+			}
+		}
+		else if(order > 4) {
+			if(order > 6) {
+				if(order == 8) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[7] * data[i-8];
+						sum += qlp_coeff[6] * data[i-7];
+						sum += qlp_coeff[5] * data[i-6];
+						sum += qlp_coeff[4] * data[i-5];
+						sum += qlp_coeff[3] * data[i-4];
+						sum += qlp_coeff[2] * data[i-3];
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						data[i] = residual[i] + (sum >> lp_quantization);
+					}
+				}
+				else { /* order == 7 */
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[6] * data[i-7];
+						sum += qlp_coeff[5] * data[i-6];
+						sum += qlp_coeff[4] * data[i-5];
+						sum += qlp_coeff[3] * data[i-4];
+						sum += qlp_coeff[2] * data[i-3];
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						data[i] = residual[i] + (sum >> lp_quantization);
+					}
+				}
+			}
+			else {
+				if(order == 6) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[5] * data[i-6];
+						sum += qlp_coeff[4] * data[i-5];
+						sum += qlp_coeff[3] * data[i-4];
+						sum += qlp_coeff[2] * data[i-3];
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						data[i] = residual[i] + (sum >> lp_quantization);
+					}
+				}
+				else { /* order == 5 */
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[4] * data[i-5];
+						sum += qlp_coeff[3] * data[i-4];
+						sum += qlp_coeff[2] * data[i-3];
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						data[i] = residual[i] + (sum >> lp_quantization);
+					}
+				}
+			}
+		}
+		else {
+			if(order > 2) {
+				if(order == 4) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[3] * data[i-4];
+						sum += qlp_coeff[2] * data[i-3];
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						data[i] = residual[i] + (sum >> lp_quantization);
+					}
+				}
+				else { /* order == 3 */
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[2] * data[i-3];
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						data[i] = residual[i] + (sum >> lp_quantization);
+					}
+				}
+			}
+			else {
+				if(order == 2) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[1] * data[i-2];
+						sum += qlp_coeff[0] * data[i-1];
+						data[i] = residual[i] + (sum >> lp_quantization);
+					}
+				}
+				else { /* order == 1 */
+					for(i = 0; i < (int)data_len; i++)
+						data[i] = residual[i] + ((qlp_coeff[0] * data[i-1]) >> lp_quantization);
+				}
+			}
+		}
+	}
+	else { /* order > 12 */
+		for(i = 0; i < (int)data_len; i++) {
+			sum = 0;
+			switch(order) {
+				case 32: sum += qlp_coeff[31] * data[i-32];
+				case 31: sum += qlp_coeff[30] * data[i-31];
+				case 30: sum += qlp_coeff[29] * data[i-30];
+				case 29: sum += qlp_coeff[28] * data[i-29];
+				case 28: sum += qlp_coeff[27] * data[i-28];
+				case 27: sum += qlp_coeff[26] * data[i-27];
+				case 26: sum += qlp_coeff[25] * data[i-26];
+				case 25: sum += qlp_coeff[24] * data[i-25];
+				case 24: sum += qlp_coeff[23] * data[i-24];
+				case 23: sum += qlp_coeff[22] * data[i-23];
+				case 22: sum += qlp_coeff[21] * data[i-22];
+				case 21: sum += qlp_coeff[20] * data[i-21];
+				case 20: sum += qlp_coeff[19] * data[i-20];
+				case 19: sum += qlp_coeff[18] * data[i-19];
+				case 18: sum += qlp_coeff[17] * data[i-18];
+				case 17: sum += qlp_coeff[16] * data[i-17];
+				case 16: sum += qlp_coeff[15] * data[i-16];
+				case 15: sum += qlp_coeff[14] * data[i-15];
+				case 14: sum += qlp_coeff[13] * data[i-14];
+				case 13: sum += qlp_coeff[12] * data[i-13];
+				         sum += qlp_coeff[11] * data[i-12];
+				         sum += qlp_coeff[10] * data[i-11];
+				         sum += qlp_coeff[ 9] * data[i-10];
+				         sum += qlp_coeff[ 8] * data[i- 9];
+				         sum += qlp_coeff[ 7] * data[i- 8];
+				         sum += qlp_coeff[ 6] * data[i- 7];
+				         sum += qlp_coeff[ 5] * data[i- 6];
+				         sum += qlp_coeff[ 4] * data[i- 5];
+				         sum += qlp_coeff[ 3] * data[i- 4];
+				         sum += qlp_coeff[ 2] * data[i- 3];
+				         sum += qlp_coeff[ 1] * data[i- 2];
+				         sum += qlp_coeff[ 0] * data[i- 1];
+			}
+			data[i] = residual[i] + (sum >> lp_quantization);
+		}
+	}
+}
+#endif
 
 void FLAC__lpc_restore_signal_wide(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
+#if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
 {
 	unsigned i, j;
 	FLAC__int64 sum;
-	const FLAC__int32 *history;
+	const FLAC__int32 *r = residual, *history;
 
 #ifdef FLAC__OVERFLOW_DETECT_VERBOSE
 	fprintf(stderr,"FLAC__lpc_restore_signal_wide: data_len=%d, order=%u, lpq=%d",data_len,order,lp_quantization);
@@ -359,19 +1078,247 @@ void FLAC__lpc_restore_signal_wide(const FLAC__int32 residual[], unsigned data_l
 		history = data;
 		for(j = 0; j < order; j++)
 			sum += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*(--history));
-#ifdef FLAC__OVERFLOW_DETECT
 		if(FLAC__bitmath_silog2_wide(sum >> lp_quantization) > 32) {
-			fprintf(stderr,"FLAC__lpc_restore_signal_wide: OVERFLOW, i=%u, sum=%lld\n", i, sum >> lp_quantization);
-			break;
-		}
-		if(FLAC__bitmath_silog2_wide((FLAC__int64)(*residual) + (sum >> lp_quantization)) > 32) {
-			fprintf(stderr,"FLAC__lpc_restore_signal_wide: OVERFLOW, i=%u, residual=%d, sum=%lld, data=%lld\n", i, *residual, sum >> lp_quantization, (FLAC__int64)(*residual) + (sum >> lp_quantization));
-			break;
-		}
+#ifdef _MSC_VER
+			fprintf(stderr,"FLAC__lpc_restore_signal_wide: OVERFLOW, i=%u, sum=%I64d\n", i, sum >> lp_quantization);
+#else
+			fprintf(stderr,"FLAC__lpc_restore_signal_wide: OVERFLOW, i=%u, sum=%lld\n", i, (long long)(sum >> lp_quantization));
 #endif
-		*(data++) = *(residual++) + (FLAC__int32)(sum >> lp_quantization);
+			break;
+		}
+		if(FLAC__bitmath_silog2_wide((FLAC__int64)(*r) + (sum >> lp_quantization)) > 32) {
+#ifdef _MSC_VER
+			fprintf(stderr,"FLAC__lpc_restore_signal_wide: OVERFLOW, i=%u, residual=%d, sum=%I64d, data=%I64d\n", i, *r, sum >> lp_quantization, (FLAC__int64)(*r) + (sum >> lp_quantization));
+#else
+			fprintf(stderr,"FLAC__lpc_restore_signal_wide: OVERFLOW, i=%u, residual=%d, sum=%lld, data=%lld\n", i, *r, (long long)(sum >> lp_quantization), (long long)((FLAC__int64)(*r) + (sum >> lp_quantization)));
+#endif
+			break;
+		}
+		*(data++) = *(r++) + (FLAC__int32)(sum >> lp_quantization);
 	}
 }
+#else /* fully unrolled version for normal use */
+{
+	int i;
+	FLAC__int64 sum;
+
+	FLAC__ASSERT(order > 0);
+	FLAC__ASSERT(order <= 32);
+
+	/*
+	 * We do unique versions up to 12th order since that's the subset limit.
+	 * Also they are roughly ordered to match frequency of occurrence to
+	 * minimize branching.
+	 */
+	if(order <= 12) {
+		if(order > 8) {
+			if(order > 10) {
+				if(order == 12) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[11] * (FLAC__int64)data[i-12];
+						sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
+						sum += qlp_coeff[9] * (FLAC__int64)data[i-10];
+						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
+						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
+						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
+						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
+						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
+						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
+						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+				else { /* order == 11 */
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
+						sum += qlp_coeff[9] * (FLAC__int64)data[i-10];
+						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
+						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
+						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
+						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
+						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
+						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
+						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+			}
+			else {
+				if(order == 10) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[9] * (FLAC__int64)data[i-10];
+						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
+						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
+						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
+						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
+						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
+						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
+						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+				else { /* order == 9 */
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
+						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
+						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
+						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
+						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
+						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
+						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+			}
+		}
+		else if(order > 4) {
+			if(order > 6) {
+				if(order == 8) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
+						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
+						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
+						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
+						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
+						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+				else { /* order == 7 */
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
+						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
+						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
+						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
+						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+			}
+			else {
+				if(order == 6) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
+						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
+						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
+						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+				else { /* order == 5 */
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
+						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
+						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+			}
+		}
+		else {
+			if(order > 2) {
+				if(order == 4) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
+						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+				else { /* order == 3 */
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+			}
+			else {
+				if(order == 2) {
+					for(i = 0; i < (int)data_len; i++) {
+						sum = 0;
+						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
+						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
+						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+					}
+				}
+				else { /* order == 1 */
+					for(i = 0; i < (int)data_len; i++)
+						data[i] = residual[i] + (FLAC__int32)((qlp_coeff[0] * (FLAC__int64)data[i-1]) >> lp_quantization);
+				}
+			}
+		}
+	}
+	else { /* order > 12 */
+		for(i = 0; i < (int)data_len; i++) {
+			sum = 0;
+			switch(order) {
+				case 32: sum += qlp_coeff[31] * (FLAC__int64)data[i-32];
+				case 31: sum += qlp_coeff[30] * (FLAC__int64)data[i-31];
+				case 30: sum += qlp_coeff[29] * (FLAC__int64)data[i-30];
+				case 29: sum += qlp_coeff[28] * (FLAC__int64)data[i-29];
+				case 28: sum += qlp_coeff[27] * (FLAC__int64)data[i-28];
+				case 27: sum += qlp_coeff[26] * (FLAC__int64)data[i-27];
+				case 26: sum += qlp_coeff[25] * (FLAC__int64)data[i-26];
+				case 25: sum += qlp_coeff[24] * (FLAC__int64)data[i-25];
+				case 24: sum += qlp_coeff[23] * (FLAC__int64)data[i-24];
+				case 23: sum += qlp_coeff[22] * (FLAC__int64)data[i-23];
+				case 22: sum += qlp_coeff[21] * (FLAC__int64)data[i-22];
+				case 21: sum += qlp_coeff[20] * (FLAC__int64)data[i-21];
+				case 20: sum += qlp_coeff[19] * (FLAC__int64)data[i-20];
+				case 19: sum += qlp_coeff[18] * (FLAC__int64)data[i-19];
+				case 18: sum += qlp_coeff[17] * (FLAC__int64)data[i-18];
+				case 17: sum += qlp_coeff[16] * (FLAC__int64)data[i-17];
+				case 16: sum += qlp_coeff[15] * (FLAC__int64)data[i-16];
+				case 15: sum += qlp_coeff[14] * (FLAC__int64)data[i-15];
+				case 14: sum += qlp_coeff[13] * (FLAC__int64)data[i-14];
+				case 13: sum += qlp_coeff[12] * (FLAC__int64)data[i-13];
+				         sum += qlp_coeff[11] * (FLAC__int64)data[i-12];
+				         sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
+				         sum += qlp_coeff[ 9] * (FLAC__int64)data[i-10];
+				         sum += qlp_coeff[ 8] * (FLAC__int64)data[i- 9];
+				         sum += qlp_coeff[ 7] * (FLAC__int64)data[i- 8];
+				         sum += qlp_coeff[ 6] * (FLAC__int64)data[i- 7];
+				         sum += qlp_coeff[ 5] * (FLAC__int64)data[i- 6];
+				         sum += qlp_coeff[ 4] * (FLAC__int64)data[i- 5];
+				         sum += qlp_coeff[ 3] * (FLAC__int64)data[i- 4];
+				         sum += qlp_coeff[ 2] * (FLAC__int64)data[i- 3];
+				         sum += qlp_coeff[ 1] * (FLAC__int64)data[i- 2];
+				         sum += qlp_coeff[ 0] * (FLAC__int64)data[i- 1];
+			}
+			data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+		}
+	}
+}
+#endif
 
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 
@@ -403,28 +1350,28 @@ FLAC__double FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scal
 	}
 }
 
-unsigned FLAC__lpc_compute_best_order(const FLAC__double lpc_error[], unsigned max_order, unsigned total_samples, unsigned bits_per_signal_sample)
+unsigned FLAC__lpc_compute_best_order(const FLAC__double lpc_error[], unsigned max_order, unsigned total_samples, unsigned overhead_bits_per_order)
 {
-	unsigned order, best_order;
-	FLAC__double best_bits, tmp_bits, error_scale;
+	unsigned order, index, best_index; /* 'index' the index into lpc_error; index==order-1 since lpc_error[0] is for order==1, lpc_error[1] is for order==2, etc */
+	FLAC__double bits, best_bits, error_scale;
 
 	FLAC__ASSERT(max_order > 0);
 	FLAC__ASSERT(total_samples > 0);
 
 	error_scale = 0.5 * M_LN2 * M_LN2 / (FLAC__double)total_samples;
 
-	best_order = 0;
-	best_bits = FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(lpc_error[0], error_scale) * (FLAC__double)total_samples;
+	best_index = 0;
+	best_bits = (unsigned)(-1);
 
-	for(order = 1; order < max_order; order++) {
-		tmp_bits = FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(lpc_error[order], error_scale) * (FLAC__double)(total_samples - order) + (FLAC__double)(order * bits_per_signal_sample);
-		if(tmp_bits < best_bits) {
-			best_order = order;
-			best_bits = tmp_bits;
+	for(index = 0, order = 1; index < max_order; index++, order++) {
+		bits = FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(lpc_error[index], error_scale) * (FLAC__double)(total_samples - order) + (FLAC__double)(order * overhead_bits_per_order);
+		if(bits < best_bits) {
+			best_index = index;
+			best_bits = bits;
 		}
 	}
 
-	return best_order+1; /* +1 since index of lpc_error[] is order-1 */
+	return best_index+1; /* +1 since index of lpc_error[] is order-1 */
 }
 
 #endif /* !defined FLAC__INTEGER_ONLY_LIBRARY */
diff --git a/FLAC/md5.c b/FLAC/md5.c
new file mode 100644
index 0000000000..f1558e4449
--- /dev/null
+++ b/FLAC/md5.c
@@ -0,0 +1,426 @@
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#include <stdlib.h>		/* for malloc() */
+#include <string.h>		/* for memcpy() */
+
+#include "private/md5.h"
+#include "share/alloc.h"
+
+#define _CRT_SECURE_NO_WARNINGS
+
+#ifndef FLaC__INLINE
+#define FLaC__INLINE
+#endif
+
+/*
+ * This code implements the MD5 message-digest algorithm.
+ * The algorithm is due to Ron Rivest.  This code was
+ * written by Colin Plumb in 1993, no copyright is claimed.
+ * This code is in the public domain; do with it what you wish.
+ *
+ * Equivalent code is available from RSA Data Security, Inc.
+ * This code has been tested against that, and is equivalent,
+ * except that you don't need to include two pages of legalese
+ * with every copy.
+ *
+ * To compute the message digest of a chunk of bytes, declare an
+ * MD5Context structure, pass it to MD5Init, call MD5Update as
+ * needed on buffers full of bytes, and then call MD5Final, which
+ * will fill a supplied 16-byte array with the digest.
+ *
+ * Changed so as no longer to depend on Colin Plumb's `usual.h' header
+ * definitions; now uses stuff from dpkg's config.h.
+ *  - Ian Jackson <ijackson@nyx.cs.du.edu>.
+ * Still in the public domain.
+ *
+ * Josh Coalson: made some changes to integrate with libFLAC.
+ * Still in the public domain.
+ */
+
+/* The four core functions - F1 is optimized somewhat */
+
+/* #define F1(x, y, z) (x & y | ~x & z) */
+#define F1(x, y, z) (z ^ (x & (y ^ z)))
+#define F2(x, y, z) F1(z, x, y)
+#define F3(x, y, z) (x ^ y ^ z)
+#define F4(x, y, z) (y ^ (x | ~z))
+
+/* This is the central step in the MD5 algorithm. */
+#define MD5STEP(f,w,x,y,z,in,s) \
+	 (w += f(x,y,z) + in, w = (w<<s | w>>(32-s)) + x)
+
+/*
+ * The core of the MD5 algorithm, this alters an existing MD5 hash to
+ * reflect the addition of 16 longwords of new data.  MD5Update blocks
+ * the data and converts bytes into longwords for this routine.
+ */
+static void FLAC__MD5Transform(FLAC__uint32 buf[4], FLAC__uint32 const in[16])
+{
+	register FLAC__uint32 a, b, c, d;
+
+	a = buf[0];
+	b = buf[1];
+	c = buf[2];
+	d = buf[3];
+
+	MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
+	MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
+	MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
+	MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
+	MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
+	MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
+	MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
+	MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
+	MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
+	MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
+	MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
+	MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
+	MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
+	MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
+	MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
+	MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
+
+	MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
+	MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
+	MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
+	MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
+	MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
+	MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
+	MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
+	MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
+	MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
+	MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
+	MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
+	MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
+	MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
+	MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
+	MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
+	MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
+
+	MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
+	MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
+	MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
+	MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
+	MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
+	MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
+	MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
+	MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
+	MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
+	MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
+	MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
+	MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
+	MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
+	MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
+	MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
+	MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
+
+	MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
+	MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
+	MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
+	MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
+	MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
+	MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
+	MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
+	MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
+	MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
+	MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
+	MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
+	MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
+	MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
+	MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
+	MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
+	MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
+
+	buf[0] += a;
+	buf[1] += b;
+	buf[2] += c;
+	buf[3] += d;
+}
+
+#if WORDS_BIGENDIAN
+//@@@@@@ OPT: use bswap/intrinsics
+static void byteSwap(FLAC__uint32 *buf, unsigned words)
+{
+	register FLAC__uint32 x;
+	do {
+		x = *buf; 
+		x = ((x << 8) & 0xff00ff00) | ((x >> 8) & 0x00ff00ff);
+		*buf++ = (x >> 16) | (x << 16);
+	} while (--words);
+}
+static void byteSwapX16(FLAC__uint32 *buf)
+{
+	register FLAC__uint32 x;
+
+	x = *buf; x = ((x << 8) & 0xff00ff00) | ((x >> 8) & 0x00ff00ff); *buf++ = (x >> 16) | (x << 16);
+	x = *buf; x = ((x << 8) & 0xff00ff00) | ((x >> 8) & 0x00ff00ff); *buf++ = (x >> 16) | (x << 16);
+	x = *buf; x = ((x << 8) & 0xff00ff00) | ((x >> 8) & 0x00ff00ff); *buf++ = (x >> 16) | (x << 16);
+	x = *buf; x = ((x << 8) & 0xff00ff00) | ((x >> 8) & 0x00ff00ff); *buf++ = (x >> 16) | (x << 16);
+	x = *buf; x = ((x << 8) & 0xff00ff00) | ((x >> 8) & 0x00ff00ff); *buf++ = (x >> 16) | (x << 16);
+	x = *buf; x = ((x << 8) & 0xff00ff00) | ((x >> 8) & 0x00ff00ff); *buf++ = (x >> 16) | (x << 16);
+	x = *buf; x = ((x << 8) & 0xff00ff00) | ((x >> 8) & 0x00ff00ff); *buf++ = (x >> 16) | (x << 16);
+	x = *buf; x = ((x << 8) & 0xff00ff00) | ((x >> 8) & 0x00ff00ff); *buf++ = (x >> 16) | (x << 16);
+	x = *buf; x = ((x << 8) & 0xff00ff00) | ((x >> 8) & 0x00ff00ff); *buf++ = (x >> 16) | (x << 16);
+	x = *buf; x = ((x << 8) & 0xff00ff00) | ((x >> 8) & 0x00ff00ff); *buf++ = (x >> 16) | (x << 16);
+	x = *buf; x = ((x << 8) & 0xff00ff00) | ((x >> 8) & 0x00ff00ff); *buf++ = (x >> 16) | (x << 16);
+	x = *buf; x = ((x << 8) & 0xff00ff00) | ((x >> 8) & 0x00ff00ff); *buf++ = (x >> 16) | (x << 16);
+	x = *buf; x = ((x << 8) & 0xff00ff00) | ((x >> 8) & 0x00ff00ff); *buf++ = (x >> 16) | (x << 16);
+	x = *buf; x = ((x << 8) & 0xff00ff00) | ((x >> 8) & 0x00ff00ff); *buf++ = (x >> 16) | (x << 16);
+	x = *buf; x = ((x << 8) & 0xff00ff00) | ((x >> 8) & 0x00ff00ff); *buf++ = (x >> 16) | (x << 16);
+	x = *buf; x = ((x << 8) & 0xff00ff00) | ((x >> 8) & 0x00ff00ff); *buf   = (x >> 16) | (x << 16);
+}
+#else
+#define byteSwap(buf, words)
+#define byteSwapX16(buf)
+#endif
+
+/*
+ * Update context to reflect the concatenation of another buffer full
+ * of bytes.
+ */
+static void FLAC__MD5Update(FLAC__MD5Context *ctx, FLAC__byte const *buf, unsigned len)
+{
+	FLAC__uint32 t;
+
+	/* Update byte count */
+
+	t = ctx->bytes[0];
+	if ((ctx->bytes[0] = t + len) < t)
+		ctx->bytes[1]++;	/* Carry from low to high */
+
+	t = 64 - (t & 0x3f);	/* Space available in ctx->in (at least 1) */
+	if (t > len) {
+		memcpy((FLAC__byte *)ctx->in + 64 - t, buf, len);
+		return;
+	}
+	/* First chunk is an odd size */
+	memcpy((FLAC__byte *)ctx->in + 64 - t, buf, t);
+	byteSwapX16(ctx->in);
+	FLAC__MD5Transform(ctx->buf, ctx->in);
+	buf += t;
+	len -= t;
+
+	/* Process data in 64-byte chunks */
+	while (len >= 64) {
+		memcpy(ctx->in, buf, 64);
+		byteSwapX16(ctx->in);
+		FLAC__MD5Transform(ctx->buf, ctx->in);
+		buf += 64;
+		len -= 64;
+	}
+
+	/* Handle any remaining bytes of data. */
+	memcpy(ctx->in, buf, len);
+}
+
+/*
+ * Start MD5 accumulation.  Set bit count to 0 and buffer to mysterious
+ * initialization constants.
+ */
+void FLAC__MD5Init(FLAC__MD5Context *ctx)
+{
+	ctx->buf[0] = 0x67452301;
+	ctx->buf[1] = 0xefcdab89;
+	ctx->buf[2] = 0x98badcfe;
+	ctx->buf[3] = 0x10325476;
+
+	ctx->bytes[0] = 0;
+	ctx->bytes[1] = 0;
+
+	ctx->internal_buf = 0;
+	ctx->capacity = 0;
+}
+
+/*
+ * Final wrapup - pad to 64-byte boundary with the bit pattern
+ * 1 0* (64-bit count of bits processed, MSB-first)
+ */
+void FLAC__MD5Final(FLAC__byte digest[16], FLAC__MD5Context *ctx)
+{
+	int count = ctx->bytes[0] & 0x3f;	/* Number of bytes in ctx->in */
+	FLAC__byte *p = (FLAC__byte *)ctx->in + count;
+
+	/* Set the first char of padding to 0x80.  There is always room. */
+	*p++ = 0x80;
+
+	/* Bytes of padding needed to make 56 bytes (-8..55) */
+	count = 56 - 1 - count;
+
+	if (count < 0) {	/* Padding forces an extra block */
+		memset(p, 0, count + 8);
+		byteSwapX16(ctx->in);
+		FLAC__MD5Transform(ctx->buf, ctx->in);
+		p = (FLAC__byte *)ctx->in;
+		count = 56;
+	}
+	memset(p, 0, count);
+	byteSwap(ctx->in, 14);
+
+	/* Append length in bits and transform */
+	ctx->in[14] = ctx->bytes[0] << 3;
+	ctx->in[15] = ctx->bytes[1] << 3 | ctx->bytes[0] >> 29;
+	FLAC__MD5Transform(ctx->buf, ctx->in);
+
+	byteSwap(ctx->buf, 4);
+	memcpy(digest, ctx->buf, 16);
+	memset(ctx, 0, sizeof(ctx));	/* In case it's sensitive */
+	if(0 != ctx->internal_buf) {
+		free(ctx->internal_buf);
+		ctx->internal_buf = 0;
+		ctx->capacity = 0;
+	}
+}
+
+/*
+ * Convert the incoming audio signal to a byte stream
+ */
+static void format_input_(FLAC__byte *buf, const FLAC__int32 * const signal[], unsigned channels, unsigned samples, unsigned bytes_per_sample)
+{
+	unsigned channel, sample;
+	register FLAC__int32 a_word;
+	register FLAC__byte *buf_ = buf;
+
+#if WORDS_BIGENDIAN
+#else
+	if(channels == 2 && bytes_per_sample == 2) {
+		FLAC__int16 *buf1_ = ((FLAC__int16*)buf_) + 1;
+		memcpy(buf_, signal[0], sizeof(FLAC__int32) * samples);
+		for(sample = 0; sample < samples; sample++, buf1_+=2)
+			*buf1_ = (FLAC__int16)signal[1][sample];
+	}
+	else if(channels == 1 && bytes_per_sample == 2) {
+		FLAC__int16 *buf1_ = (FLAC__int16*)buf_;
+		for(sample = 0; sample < samples; sample++)
+			*buf1_++ = (FLAC__int16)signal[0][sample];
+	}
+	else
+#endif
+	if(bytes_per_sample == 2) {
+		if(channels == 2) {
+			for(sample = 0; sample < samples; sample++) {
+				a_word = signal[0][sample];
+				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
+				*buf_++ = (FLAC__byte)a_word;
+				a_word = signal[1][sample];
+				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
+				*buf_++ = (FLAC__byte)a_word;
+			}
+		}
+		else if(channels == 1) {
+			for(sample = 0; sample < samples; sample++) {
+				a_word = signal[0][sample];
+				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
+				*buf_++ = (FLAC__byte)a_word;
+			}
+		}
+		else {
+			for(sample = 0; sample < samples; sample++) {
+				for(channel = 0; channel < channels; channel++) {
+					a_word = signal[channel][sample];
+					*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
+					*buf_++ = (FLAC__byte)a_word;
+				}
+			}
+		}
+	}
+	else if(bytes_per_sample == 3) {
+		if(channels == 2) {
+			for(sample = 0; sample < samples; sample++) {
+				a_word = signal[0][sample];
+				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
+				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
+				*buf_++ = (FLAC__byte)a_word;
+				a_word = signal[1][sample];
+				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
+				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
+				*buf_++ = (FLAC__byte)a_word;
+			}
+		}
+		else if(channels == 1) {
+			for(sample = 0; sample < samples; sample++) {
+				a_word = signal[0][sample];
+				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
+				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
+				*buf_++ = (FLAC__byte)a_word;
+			}
+		}
+		else {
+			for(sample = 0; sample < samples; sample++) {
+				for(channel = 0; channel < channels; channel++) {
+					a_word = signal[channel][sample];
+					*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
+					*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
+					*buf_++ = (FLAC__byte)a_word;
+				}
+			}
+		}
+	}
+	else if(bytes_per_sample == 1) {
+		if(channels == 2) {
+			for(sample = 0; sample < samples; sample++) {
+				a_word = signal[0][sample];
+				*buf_++ = (FLAC__byte)a_word;
+				a_word = signal[1][sample];
+				*buf_++ = (FLAC__byte)a_word;
+			}
+		}
+		else if(channels == 1) {
+			for(sample = 0; sample < samples; sample++) {
+				a_word = signal[0][sample];
+				*buf_++ = (FLAC__byte)a_word;
+			}
+		}
+		else {
+			for(sample = 0; sample < samples; sample++) {
+				for(channel = 0; channel < channels; channel++) {
+					a_word = signal[channel][sample];
+					*buf_++ = (FLAC__byte)a_word;
+				}
+			}
+		}
+	}
+	else { /* bytes_per_sample == 4, maybe optimize more later */
+		for(sample = 0; sample < samples; sample++) {
+			for(channel = 0; channel < channels; channel++) {
+				a_word = signal[channel][sample];
+				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
+				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
+				*buf_++ = (FLAC__byte)a_word; a_word >>= 8;
+				*buf_++ = (FLAC__byte)a_word;
+			}
+		}
+	}
+}
+
+/*
+ * Convert the incoming audio signal to a byte stream and FLAC__MD5Update it.
+ */
+FLAC__bool FLAC__MD5Accumulate(FLAC__MD5Context *ctx, const FLAC__int32 * const signal[], unsigned channels, unsigned samples, unsigned bytes_per_sample)
+{
+	const size_t bytes_needed = (size_t)channels * (size_t)samples * (size_t)bytes_per_sample;
+
+	/* overflow check */
+	if((size_t)channels > SIZE_MAX / (size_t)bytes_per_sample)
+		return false;
+	if((size_t)channels * (size_t)bytes_per_sample > SIZE_MAX / (size_t)samples)
+		return false;
+
+	if(ctx->capacity < bytes_needed) {
+		FLAC__byte *tmp = (FLAC__byte*)realloc(ctx->internal_buf, bytes_needed);
+		if(0 == tmp) {
+			free(ctx->internal_buf);
+			if(0 == (ctx->internal_buf = (FLAC__byte*)safe_malloc_(bytes_needed)))
+				return false;
+		}
+		ctx->internal_buf = tmp;
+		ctx->capacity = bytes_needed;
+	}
+
+	format_input_(ctx->internal_buf, signal, channels, samples, bytes_per_sample);
+
+	FLAC__MD5Update(ctx, ctx->internal_buf, (unsigned int)bytes_needed);
+
+	return true;
+}
diff --git a/FLAC/memory.c b/FLAC/memory.c
index fcdf58ca56..e514e90443 100644
--- a/FLAC/memory.c
+++ b/FLAC/memory.c
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2001,2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,12 +29,13 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
 #include "private/memory.h"
 #include "FLAC/assert.h"
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
+#include "share/alloc.h"
 
 void *FLAC__memory_alloc_aligned(size_t bytes, void **aligned_address)
 {
@@ -44,10 +45,27 @@ void *FLAC__memory_alloc_aligned(size_t bytes, void **aligned_address)
 
 #ifdef FLAC__ALIGN_MALLOC_DATA
 	/* align on 32-byte (256-bit) boundary */
-	x = malloc(bytes+31);
-	*aligned_address = (void*)(((unsigned)x + 31) & -32);
+	x = safe_malloc_add_2op_(bytes, /*+*/31);
+#ifdef SIZEOF_VOIDP
+#if SIZEOF_VOIDP == 4
+		/* could do  *aligned_address = x + ((unsigned) (32 - (((unsigned)x) & 31))) & 31; */
+		*aligned_address = (void*)(((unsigned)x + 31) & -32);
+#elif SIZEOF_VOIDP == 8
+		*aligned_address = (void*)(((FLAC__uint64)x + 31) & (FLAC__uint64)(-((FLAC__int64)32)));
 #else
-	x = malloc(bytes);
+# error  Unsupported sizeof(void*)
+#endif
+#else
+	/* there's got to be a better way to do this right for all archs */
+	if(sizeof(void*) == sizeof(unsigned))
+		*aligned_address = (void*)(((unsigned)x + 31) & -32);
+	else if(sizeof(void*) == sizeof(FLAC__uint64))
+		*aligned_address = (void*)(((FLAC__uint64)x + 31) & (FLAC__uint64)(-((FLAC__int64)32)));
+	else
+		return 0;
+#endif
+#else
+	x = safe_malloc_(bytes);
 	*aligned_address = x;
 #endif
 	return x;
@@ -66,7 +84,10 @@ FLAC__bool FLAC__memory_alloc_aligned_int32_array(unsigned elements, FLAC__int32
 	FLAC__ASSERT(0 != aligned_pointer);
 	FLAC__ASSERT(unaligned_pointer != aligned_pointer);
 
-	pu = (FLAC__int32*)FLAC__memory_alloc_aligned(sizeof(FLAC__int32) * elements, &u.pv);
+	if((size_t)elements > SIZE_MAX / sizeof(*pu)) /* overflow check */
+		return false;
+
+	pu = (FLAC__int32*)FLAC__memory_alloc_aligned(sizeof(*pu) * (size_t)elements, &u.pv);
 	if(0 == pu) {
 		return false;
 	}
@@ -92,7 +113,10 @@ FLAC__bool FLAC__memory_alloc_aligned_uint32_array(unsigned elements, FLAC__uint
 	FLAC__ASSERT(0 != aligned_pointer);
 	FLAC__ASSERT(unaligned_pointer != aligned_pointer);
 
-	pu = (FLAC__uint32*)FLAC__memory_alloc_aligned(sizeof(FLAC__uint32) * elements, &u.pv);
+	if((size_t)elements > SIZE_MAX / sizeof(*pu)) /* overflow check */
+		return false;
+
+	pu = (FLAC__uint32*)FLAC__memory_alloc_aligned(sizeof(*pu) * elements, &u.pv);
 	if(0 == pu) {
 		return false;
 	}
@@ -118,7 +142,10 @@ FLAC__bool FLAC__memory_alloc_aligned_uint64_array(unsigned elements, FLAC__uint
 	FLAC__ASSERT(0 != aligned_pointer);
 	FLAC__ASSERT(unaligned_pointer != aligned_pointer);
 
-	pu = (FLAC__uint64*)FLAC__memory_alloc_aligned(sizeof(FLAC__uint64) * elements, &u.pv);
+	if((size_t)elements > SIZE_MAX / sizeof(*pu)) /* overflow check */
+		return false;
+
+	pu = (FLAC__uint64*)FLAC__memory_alloc_aligned(sizeof(*pu) * elements, &u.pv);
 	if(0 == pu) {
 		return false;
 	}
@@ -144,7 +171,10 @@ FLAC__bool FLAC__memory_alloc_aligned_unsigned_array(unsigned elements, unsigned
 	FLAC__ASSERT(0 != aligned_pointer);
 	FLAC__ASSERT(unaligned_pointer != aligned_pointer);
 
-	pu = (unsigned*)FLAC__memory_alloc_aligned(sizeof(unsigned) * elements, &u.pv);
+	if((size_t)elements > SIZE_MAX / sizeof(*pu)) /* overflow check */
+		return false;
+
+	pu = (unsigned*)FLAC__memory_alloc_aligned(sizeof(*pu) * elements, &u.pv);
 	if(0 == pu) {
 		return false;
 	}
@@ -172,7 +202,10 @@ FLAC__bool FLAC__memory_alloc_aligned_real_array(unsigned elements, FLAC__real *
 	FLAC__ASSERT(0 != aligned_pointer);
 	FLAC__ASSERT(unaligned_pointer != aligned_pointer);
 
-	pu = (FLAC__real*)FLAC__memory_alloc_aligned(sizeof(FLAC__real) * elements, &u.pv);
+	if((size_t)elements > SIZE_MAX / sizeof(*pu)) /* overflow check */
+		return false;
+
+	pu = (FLAC__real*)FLAC__memory_alloc_aligned(sizeof(*pu) * elements, &u.pv);
 	if(0 == pu) {
 		return false;
 	}
diff --git a/FLAC/private/bitbuffer.h b/FLAC/private/bitbuffer.h
deleted file mode 100644
index 0923df895f..0000000000
--- a/FLAC/private/bitbuffer.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005  Josh Coalson
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * - Neither the name of the Xiph.org Foundation nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef FLAC__PRIVATE__BITBUFFER_H
-#define FLAC__PRIVATE__BITBUFFER_H
-
-#include <stdio.h> /* for FILE */
-#include "FLAC/ordinals.h"
-
-/* @@@ This should be configurable. Valid values are currently 8 and 32. */
-/* @@@ WATCHOUT!  do not use 32 with a little endian system yet. */
-#define FLAC__BITS_PER_BLURB 8
-
-#if FLAC__BITS_PER_BLURB == 8
-typedef FLAC__byte FLAC__blurb;
-#elif FLAC__BITS_PER_BLURB == 32
-typedef FLAC__uint32 FLAC__blurb;
-#else
-/* ERROR, only sizes of 8 and 32 are supported */
-#endif
-
-/*
- * opaque structure definition
- */
-struct FLAC__BitBuffer;
-typedef struct FLAC__BitBuffer FLAC__BitBuffer;
-
-/*
- * construction, deletion, initialization, cloning functions
- */
-FLAC__BitBuffer *FLAC__bitbuffer_new();
-void FLAC__bitbuffer_delete(FLAC__BitBuffer *bb);
-FLAC__bool FLAC__bitbuffer_init(FLAC__BitBuffer *bb);
-FLAC__bool FLAC__bitbuffer_init_from(FLAC__BitBuffer *bb, const FLAC__byte buffer[], unsigned bytes);
-FLAC__bool FLAC__bitbuffer_concatenate_aligned(FLAC__BitBuffer *dest, const FLAC__BitBuffer *src);
-void FLAC__bitbuffer_free(FLAC__BitBuffer *bb); /* does not 'free(buffer)' */
-FLAC__bool FLAC__bitbuffer_clear(FLAC__BitBuffer *bb);
-FLAC__bool FLAC__bitbuffer_clone(FLAC__BitBuffer *dest, const FLAC__BitBuffer *src);
-
-/*
- * CRC functions
- */
-void FLAC__bitbuffer_reset_read_crc16(FLAC__BitBuffer *bb, FLAC__uint16 seed);
-FLAC__uint16 FLAC__bitbuffer_get_read_crc16(FLAC__BitBuffer *bb);
-FLAC__uint16 FLAC__bitbuffer_get_write_crc16(const FLAC__BitBuffer *bb);
-FLAC__byte FLAC__bitbuffer_get_write_crc8(const FLAC__BitBuffer *bb);
-
-/*
- * info functions
- */
-FLAC__bool FLAC__bitbuffer_is_byte_aligned(const FLAC__BitBuffer *bb);
-FLAC__bool FLAC__bitbuffer_is_consumed_byte_aligned(const FLAC__BitBuffer *bb);
-unsigned FLAC__bitbuffer_bits_left_for_byte_alignment(const FLAC__BitBuffer *bb);
-unsigned FLAC__bitbuffer_get_input_bytes_unconsumed(const FLAC__BitBuffer *bb); /* do not call unless byte-aligned */
-
-/*
- * direct buffer access
- */
-void FLAC__bitbuffer_get_buffer(FLAC__BitBuffer *bb, const FLAC__byte **buffer, unsigned *bytes);
-void FLAC__bitbuffer_release_buffer(FLAC__BitBuffer *bb);
-
-/*
- * write functions
- */
-FLAC__bool FLAC__bitbuffer_write_zeroes(FLAC__BitBuffer *bb, unsigned bits);
-FLAC__bool FLAC__bitbuffer_write_raw_uint32(FLAC__BitBuffer *bb, FLAC__uint32 val, unsigned bits);
-FLAC__bool FLAC__bitbuffer_write_raw_int32(FLAC__BitBuffer *bb, FLAC__int32 val, unsigned bits);
-FLAC__bool FLAC__bitbuffer_write_raw_uint64(FLAC__BitBuffer *bb, FLAC__uint64 val, unsigned bits);
-#if 0 /* UNUSED */
-FLAC__bool FLAC__bitbuffer_write_raw_int64(FLAC__BitBuffer *bb, FLAC__int64 val, unsigned bits);
-#endif
-FLAC__bool FLAC__bitbuffer_write_raw_uint32_little_endian(FLAC__BitBuffer *bb, FLAC__uint32 val); /*only for bits=32*/
-FLAC__bool FLAC__bitbuffer_write_byte_block(FLAC__BitBuffer *bb, const FLAC__byte vals[], unsigned nvals);
-FLAC__bool FLAC__bitbuffer_write_unary_unsigned(FLAC__BitBuffer *bb, unsigned val);
-unsigned FLAC__bitbuffer_rice_bits(int val, unsigned parameter);
-#if 0 /* UNUSED */
-unsigned FLAC__bitbuffer_golomb_bits_signed(int val, unsigned parameter);
-unsigned FLAC__bitbuffer_golomb_bits_unsigned(unsigned val, unsigned parameter);
-#endif
-FLAC__bool FLAC__bitbuffer_write_rice_signed(FLAC__BitBuffer *bb, int val, unsigned parameter);
-#if 0 /* UNUSED */
-FLAC__bool FLAC__bitbuffer_write_rice_signed_guarded(FLAC__BitBuffer *bb, int val, unsigned parameter, unsigned max_bits, FLAC__bool *overflow);
-#endif
-#if 0 /* UNUSED */
-FLAC__bool FLAC__bitbuffer_write_golomb_signed(FLAC__BitBuffer *bb, int val, unsigned parameter);
-FLAC__bool FLAC__bitbuffer_write_golomb_signed_guarded(FLAC__BitBuffer *bb, int val, unsigned parameter, unsigned max_bits, FLAC__bool *overflow);
-FLAC__bool FLAC__bitbuffer_write_golomb_unsigned(FLAC__BitBuffer *bb, unsigned val, unsigned parameter);
-FLAC__bool FLAC__bitbuffer_write_golomb_unsigned_guarded(FLAC__BitBuffer *bb, unsigned val, unsigned parameter, unsigned max_bits, FLAC__bool *overflow);
-#endif
-FLAC__bool FLAC__bitbuffer_write_utf8_uint32(FLAC__BitBuffer *bb, FLAC__uint32 val);
-FLAC__bool FLAC__bitbuffer_write_utf8_uint64(FLAC__BitBuffer *bb, FLAC__uint64 val);
-FLAC__bool FLAC__bitbuffer_zero_pad_to_byte_boundary(FLAC__BitBuffer *bb);
-
-/*
- * read functions
- */
-FLAC__bool FLAC__bitbuffer_peek_bit(FLAC__BitBuffer *bb, unsigned *val, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data);
-FLAC__bool FLAC__bitbuffer_read_bit(FLAC__BitBuffer *bb, unsigned *val, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data);
-FLAC__bool FLAC__bitbuffer_read_bit_to_uint32(FLAC__BitBuffer *bb, FLAC__uint32 *val, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data);
-FLAC__bool FLAC__bitbuffer_read_bit_to_uint64(FLAC__BitBuffer *bb, FLAC__uint64 *val, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data);
-FLAC__bool FLAC__bitbuffer_read_raw_uint32(FLAC__BitBuffer *bb, FLAC__uint32 *val, const unsigned bits, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data);
-FLAC__bool FLAC__bitbuffer_read_raw_int32(FLAC__BitBuffer *bb, FLAC__int32 *val, const unsigned bits, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data);
-FLAC__bool FLAC__bitbuffer_read_raw_uint64(FLAC__BitBuffer *bb, FLAC__uint64 *val, const unsigned bits, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data);
-#if 0 /* UNUSED */
-FLAC__bool FLAC__bitbuffer_read_raw_int64(FLAC__BitBuffer *bb, FLAC__int64 *val, const unsigned bits, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data);
-#endif
-FLAC__bool FLAC__bitbuffer_read_raw_uint32_little_endian(FLAC__BitBuffer *bb, FLAC__uint32 *val, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data); /*only for bits=32*/
-FLAC__bool FLAC__bitbuffer_skip_bits_no_crc(FLAC__BitBuffer *bb, unsigned bits, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data); /* WATCHOUT: does not CRC the skipped data! */ /*@@@@ add to unit tests */
-FLAC__bool FLAC__bitbuffer_read_byte_block_aligned_no_crc(FLAC__BitBuffer *bb, FLAC__byte *val, unsigned nvals, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data); /* val may be 0 to skip bytes instead of reading them */ /* WATCHOUT: does not CRC the read data! */
-FLAC__bool FLAC__bitbuffer_read_unary_unsigned(FLAC__BitBuffer *bb, unsigned *val, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data);
-FLAC__bool FLAC__bitbuffer_read_rice_signed(FLAC__BitBuffer *bb, int *val, unsigned parameter, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data);
-FLAC__bool FLAC__bitbuffer_read_rice_signed_block(FLAC__BitBuffer *bb, int vals[], unsigned nvals, unsigned parameter, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data);
-#if 0 /* UNUSED */
-FLAC__bool FLAC__bitbuffer_read_golomb_signed(FLAC__BitBuffer *bb, int *val, unsigned parameter, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data);
-FLAC__bool FLAC__bitbuffer_read_golomb_unsigned(FLAC__BitBuffer *bb, unsigned *val, unsigned parameter, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data);
-#endif
-FLAC__bool FLAC__bitbuffer_read_utf8_uint32(FLAC__BitBuffer *bb, FLAC__uint32 *val, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data, FLAC__byte *raw, unsigned *rawlen);
-FLAC__bool FLAC__bitbuffer_read_utf8_uint64(FLAC__BitBuffer *bb, FLAC__uint64 *val, FLAC__bool (*read_callback)(FLAC__byte buffer[], unsigned *bytes, void *client_data), void *client_data, FLAC__byte *raw, unsigned *rawlen);
-void FLAC__bitbuffer_dump(const FLAC__BitBuffer *bb, FILE *out);
-
-#endif
diff --git a/FLAC/private/bitmath.h b/FLAC/private/bitmath.h
index 980ee29ab3..529f7a5889 100644
--- a/FLAC/private/bitmath.h
+++ b/FLAC/private/bitmath.h
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2001,2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
diff --git a/FLAC/private/bitreader.h b/FLAC/private/bitreader.h
new file mode 100644
index 0000000000..fd0f6aac6c
--- /dev/null
+++ b/FLAC/private/bitreader.h
@@ -0,0 +1,99 @@
+/* libFLAC - Free Lossless Audio Codec library
+ * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Xiph.org Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FLAC__PRIVATE__BITREADER_H
+#define FLAC__PRIVATE__BITREADER_H
+
+#include <stdio.h> /* for FILE */
+#include "FLAC/ordinals.h"
+#include "cpu.h"
+
+/*
+ * opaque structure definition
+ */
+struct FLAC__BitReader;
+typedef struct FLAC__BitReader FLAC__BitReader;
+
+typedef FLAC__bool (*FLAC__BitReaderReadCallback)(FLAC__byte buffer[], size_t *bytes, void *client_data);
+
+/*
+ * construction, deletion, initialization, etc functions
+ */
+FLAC__BitReader *FLAC__bitreader_new(void);
+void FLAC__bitreader_delete(FLAC__BitReader *br);
+FLAC__bool FLAC__bitreader_init(FLAC__BitReader *br, FLAC__CPUInfo cpu, FLAC__BitReaderReadCallback rcb, void *cd);
+void FLAC__bitreader_free(FLAC__BitReader *br); /* does not 'free(br)' */
+FLAC__bool FLAC__bitreader_clear(FLAC__BitReader *br);
+void FLAC__bitreader_dump(const FLAC__BitReader *br, FILE *out);
+
+/*
+ * CRC functions
+ */
+void FLAC__bitreader_reset_read_crc16(FLAC__BitReader *br, FLAC__uint16 seed);
+FLAC__uint16 FLAC__bitreader_get_read_crc16(FLAC__BitReader *br);
+
+/*
+ * info functions
+ */
+FLAC__bool FLAC__bitreader_is_consumed_byte_aligned(const FLAC__BitReader *br);
+unsigned FLAC__bitreader_bits_left_for_byte_alignment(const FLAC__BitReader *br);
+unsigned FLAC__bitreader_get_input_bits_unconsumed(const FLAC__BitReader *br);
+
+/*
+ * read functions
+ */
+
+FLAC__bool FLAC__bitreader_read_raw_uint32(FLAC__BitReader *br, FLAC__uint32 *val, unsigned bits);
+FLAC__bool FLAC__bitreader_read_raw_int32(FLAC__BitReader *br, FLAC__int32 *val, unsigned bits);
+FLAC__bool FLAC__bitreader_read_raw_uint64(FLAC__BitReader *br, FLAC__uint64 *val, unsigned bits);
+FLAC__bool FLAC__bitreader_read_uint32_little_endian(FLAC__BitReader *br, FLAC__uint32 *val); /*only for bits=32*/
+FLAC__bool FLAC__bitreader_skip_bits_no_crc(FLAC__BitReader *br, unsigned bits); /* WATCHOUT: does not CRC the skipped data! */ /*@@@@ add to unit tests */
+FLAC__bool FLAC__bitreader_skip_byte_block_aligned_no_crc(FLAC__BitReader *br, unsigned nvals); /* WATCHOUT: does not CRC the read data! */
+FLAC__bool FLAC__bitreader_read_byte_block_aligned_no_crc(FLAC__BitReader *br, FLAC__byte *val, unsigned nvals); /* WATCHOUT: does not CRC the read data! */
+FLAC__bool FLAC__bitreader_read_unary_unsigned(FLAC__BitReader *br, unsigned *val);
+FLAC__bool FLAC__bitreader_read_rice_signed(FLAC__BitReader *br, int *val, unsigned parameter);
+FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter);
+#ifndef FLAC__NO_ASM
+#  ifdef FLAC__CPU_IA32
+#    ifdef FLAC__HAS_NASM
+FLAC__bool FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter);
+#    endif
+#  endif
+#endif
+#if 0 /* UNUSED */
+FLAC__bool FLAC__bitreader_read_golomb_signed(FLAC__BitReader *br, int *val, unsigned parameter);
+FLAC__bool FLAC__bitreader_read_golomb_unsigned(FLAC__BitReader *br, unsigned *val, unsigned parameter);
+#endif
+FLAC__bool FLAC__bitreader_read_utf8_uint32(FLAC__BitReader *br, FLAC__uint32 *val, FLAC__byte *raw, unsigned *rawlen);
+FLAC__bool FLAC__bitreader_read_utf8_uint64(FLAC__BitReader *br, FLAC__uint64 *val, FLAC__byte *raw, unsigned *rawlen);
+
+FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br);
+#endif
diff --git a/FLAC/private/cpu.h b/FLAC/private/cpu.h
index acd4615d8c..aa0f41443c 100644
--- a/FLAC/private/cpu.h
+++ b/FLAC/private/cpu.h
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2001,2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -45,11 +45,15 @@ typedef enum {
 } FLAC__CPUInfo_Type;
 
 typedef struct {
+	FLAC__bool cpuid;
+	FLAC__bool bswap;
 	FLAC__bool cmov;
 	FLAC__bool mmx;
 	FLAC__bool fxsr;
 	FLAC__bool sse;
 	FLAC__bool sse2;
+	FLAC__bool sse3;
+	FLAC__bool ssse3;
 	FLAC__bool _3dnow;
 	FLAC__bool ext3dnow;
 	FLAC__bool extmmx;
@@ -60,16 +64,6 @@ typedef struct {
 	FLAC__bool ppc64;
 } FLAC__CPUInfo_PPC;
 
-extern const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV;
-extern const unsigned FLAC__CPUINFO_IA32_CPUID_MMX;
-extern const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR;
-extern const unsigned FLAC__CPUINFO_IA32_CPUID_SSE;
-extern const unsigned FLAC__CPUINFO_IA32_CPUID_SSE2;
-
-extern const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW;
-extern const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW;
-extern const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX;
-
 typedef struct {
 	FLAC__bool use_asm;
 	FLAC__CPUInfo_Type type;
@@ -84,9 +78,9 @@ void FLAC__cpu_info(FLAC__CPUInfo *info);
 #ifndef FLAC__NO_ASM
 #ifdef FLAC__CPU_IA32
 #ifdef FLAC__HAS_NASM
-unsigned FLAC__cpu_info_asm_ia32();
-unsigned FLAC__cpu_info_extended_amd_asm_ia32();
-unsigned FLAC__cpu_info_sse_test_asm_ia32();
+FLAC__uint32 FLAC__cpu_have_cpuid_asm_ia32(void);
+void         FLAC__cpu_info_asm_ia32(FLAC__uint32 *flags_edx, FLAC__uint32 *flags_ecx);
+FLAC__uint32 FLAC__cpu_info_extended_amd_asm_ia32(void);
 #endif
 #endif
 #endif
diff --git a/FLAC/private/crc.h b/FLAC/private/crc.h
index 083cbcf51b..414c42c60e 100644
--- a/FLAC/private/crc.h
+++ b/FLAC/private/crc.h
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -48,10 +48,14 @@ FLAC__uint8 FLAC__crc8(const FLAC__byte *data, unsigned len);
 ** polynomial = x^16 + x^15 + x^2 + x^0
 ** init = 0
 */
-extern FLAC__uint16 FLAC__crc16_table[256];
-#define FLAC__CRC16_UPDATE(data, crc) (crc) = ((crc)<<8) ^ FLAC__crc16_table[((crc)>>8) ^ (data)];
-void FLAC__crc16_update(const FLAC__byte data, FLAC__uint16 *crc);
-void FLAC__crc16_update_block(const FLAC__byte *data, unsigned len, FLAC__uint16 *crc);
-FLAC__uint16 FLAC__crc16(const FLAC__byte *data, unsigned len);
+extern unsigned FLAC__crc16_table[256];
+
+#define FLAC__CRC16_UPDATE(data, crc) (((((crc)<<8) & 0xffff) ^ FLAC__crc16_table[((crc)>>8) ^ (data)]))
+/* this alternate may be faster on some systems/compilers */
+#if 0
+#define FLAC__CRC16_UPDATE(data, crc) ((((crc)<<8) ^ FLAC__crc16_table[((crc)>>8) ^ (data)]) & 0xffff)
+#endif
+
+unsigned FLAC__crc16(const FLAC__byte *data, unsigned len);
 
 #endif
diff --git a/FLAC/private/fixed.h b/FLAC/private/fixed.h
index de35b6d679..98383cf22d 100644
--- a/FLAC/private/fixed.h
+++ b/FLAC/private/fixed.h
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
diff --git a/FLAC/private/float.h b/FLAC/private/float.h
index 67a5f266e8..73313f6dbf 100644
--- a/FLAC/private/float.h
+++ b/FLAC/private/float.h
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2004,2005  Josh Coalson
+ * Copyright (C) 2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
diff --git a/FLAC/private/format.h b/FLAC/private/format.h
index 62eb8cb18f..be561d290c 100644
--- a/FLAC/private/format.h
+++ b/FLAC/private/format.h
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
diff --git a/FLAC/private/lpc.h b/FLAC/private/lpc.h
index 79781add91..ff67f2dcc6 100644
--- a/FLAC/private/lpc.h
+++ b/FLAC/private/lpc.h
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -41,6 +41,19 @@
 
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 
+/*
+ *	FLAC__lpc_window_data()
+ *	--------------------------------------------------------------------
+ *	Applies the given window to the data.
+ *  OPT: asm implementation
+ *
+ *	IN in[0,data_len-1]
+ *	IN window[0,data_len-1]
+ *	OUT out[0,lag-1]
+ *	IN data_len
+ */
+void FLAC__lpc_window_data(const FLAC__int32 in[], const FLAC__real window[], FLAC__real out[], unsigned data_len);
+
 /*
  *	FLAC__lpc_compute_autocorrelation()
  *	--------------------------------------------------------------------
@@ -78,13 +91,16 @@ void FLAC__lpc_compute_autocorrelation_asm_ia32_3dnow(const FLAC__real data[], u
  *	OUT lp_coeff[0,max_order-1][0,max_order-1] LP coefficients for each order
  *	*** IMPORTANT:
  *	*** lp_coeff[0,max_order-1][max_order,FLAC__MAX_LPC_ORDER-1] are untouched
- *	OUT error[0,max_order-1]                   error for each order
+ *	OUT error[0,max_order-1]                   error for each order (more
+ *	                                           specifically, the variance of
+ *	                                           the error signal times # of
+ *	                                           samples in the signal)
  *
  *	Example: if max_order is 9, the LP coefficients for order 9 will be
  *	         in lp_coeff[8][0,8], the LP coefficients for order 8 will be
  *			 in lp_coeff[7][0,7], etc.
  */
-void FLAC__lpc_compute_lp_coefficients(const FLAC__real autoc[], unsigned max_order, FLAC__real lp_coeff[][FLAC__MAX_LPC_ORDER], FLAC__double error[]);
+void FLAC__lpc_compute_lp_coefficients(const FLAC__real autoc[], unsigned *max_order, FLAC__real lp_coeff[][FLAC__MAX_LPC_ORDER], FLAC__double error[]);
 
 /*
  *	FLAC__lpc_quantize_coefficients()
@@ -187,10 +203,11 @@ FLAC__double FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scal
  *	IN lpc_error[0,max_order-1] >= 0.0  error returned from calculating LP coefficients
  *	IN max_order > 0                    max LP order
  *	IN total_samples > 0                # of samples in residual signal
- *	IN bits_per_signal_sample           # of bits per sample in the original signal
+ *	IN overhead_bits_per_order          # of bits overhead for each increased LP order
+ *	                                    (includes warmup sample size and quantized LP coefficient)
  *	RETURN [1,max_order]                best order
  */
-unsigned FLAC__lpc_compute_best_order(const FLAC__double lpc_error[], unsigned max_order, unsigned total_samples, unsigned bits_per_signal_sample);
+unsigned FLAC__lpc_compute_best_order(const FLAC__double lpc_error[], unsigned max_order, unsigned total_samples, unsigned overhead_bits_per_order);
 
 #endif /* !defined FLAC__INTEGER_ONLY_LIBRARY */
 
diff --git a/FLAC/private/md5.h b/FLAC/private/md5.h
new file mode 100644
index 0000000000..e5f675a830
--- /dev/null
+++ b/FLAC/private/md5.h
@@ -0,0 +1,44 @@
+#ifndef FLAC__PRIVATE__MD5_H
+#define FLAC__PRIVATE__MD5_H
+
+/*
+ * This is the header file for the MD5 message-digest algorithm.
+ * The algorithm is due to Ron Rivest.  This code was
+ * written by Colin Plumb in 1993, no copyright is claimed.
+ * This code is in the public domain; do with it what you wish.
+ *
+ * Equivalent code is available from RSA Data Security, Inc.
+ * This code has been tested against that, and is equivalent,
+ * except that you don't need to include two pages of legalese
+ * with every copy.
+ *
+ * To compute the message digest of a chunk of bytes, declare an
+ * MD5Context structure, pass it to MD5Init, call MD5Update as
+ * needed on buffers full of bytes, and then call MD5Final, which
+ * will fill a supplied 16-byte array with the digest.
+ *
+ * Changed so as no longer to depend on Colin Plumb's `usual.h'
+ * header definitions; now uses stuff from dpkg's config.h
+ *  - Ian Jackson <ijackson@nyx.cs.du.edu>.
+ * Still in the public domain.
+ *
+ * Josh Coalson: made some changes to integrate with libFLAC.
+ * Still in the public domain, with no warranty.
+ */
+
+#include "FLAC/ordinals.h"
+
+typedef struct {
+	FLAC__uint32 in[16];
+	FLAC__uint32 buf[4];
+	FLAC__uint32 bytes[2];
+	FLAC__byte *internal_buf;
+	size_t capacity;
+} FLAC__MD5Context;
+
+void FLAC__MD5Init(FLAC__MD5Context *context);
+void FLAC__MD5Final(FLAC__byte digest[16], FLAC__MD5Context *context);
+
+FLAC__bool FLAC__MD5Accumulate(FLAC__MD5Context *ctx, const FLAC__int32 * const signal[], unsigned channels, unsigned samples, unsigned bytes_per_sample);
+
+#endif
diff --git a/FLAC/private/memory.h b/FLAC/private/memory.h
index c985a72fbd..d39e48d952 100644
--- a/FLAC/private/memory.h
+++ b/FLAC/private/memory.h
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2001,2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2001,2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
diff --git a/FLAC/protected/stream_decoder.h b/FLAC/protected/stream_decoder.h
index 8cc1e6bdf4..42f542b37e 100644
--- a/FLAC/protected/stream_decoder.h
+++ b/FLAC/protected/stream_decoder.h
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -33,6 +33,9 @@
 #define FLAC__PROTECTED__STREAM_DECODER_H
 
 #include "FLAC/stream_decoder.h"
+#if FLAC__HAS_OGG
+#include "private/ogg_decoder_aspect.h"
+#endif
 
 typedef struct FLAC__StreamDecoderProtected {
 	FLAC__StreamDecoderState state;
@@ -41,6 +44,10 @@ typedef struct FLAC__StreamDecoderProtected {
 	unsigned bits_per_sample;
 	unsigned sample_rate; /* in Hz */
 	unsigned blocksize; /* in samples (per channel) */
+	FLAC__bool md5_checking; /* if true, generate MD5 signature of decoded data and compare against signature in the STREAMINFO metadata block */
+#if FLAC__HAS_OGG
+	FLAC__OggDecoderAspect ogg_decoder_aspect;
+#endif
 } FLAC__StreamDecoderProtected;
 
 /*
diff --git a/FLAC/share/alloc.h b/FLAC/share/alloc.h
new file mode 100644
index 0000000000..812aa69d0b
--- /dev/null
+++ b/FLAC/share/alloc.h
@@ -0,0 +1,212 @@
+/* alloc - Convenience routines for safely allocating memory
+ * Copyright (C) 2007  Josh Coalson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef FLAC__SHARE__ALLOC_H
+#define FLAC__SHARE__ALLOC_H
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+/* WATCHOUT: for c++ you may have to #define __STDC_LIMIT_MACROS 1 real early
+ * before #including this file,  otherwise SIZE_MAX might not be defined
+ */
+
+#include <limits.h> /* for SIZE_MAX */
+#if !defined _MSC_VER && !defined __MINGW32__ && !defined __EMX__
+#include <stdint.h> /* for SIZE_MAX in case limits.h didn't get it */
+#endif
+#include <stdlib.h> /* for size_t, malloc(), etc */
+
+#ifndef SIZE_MAX
+# ifndef SIZE_T_MAX
+#  ifdef _MSC_VER
+#   define SIZE_T_MAX UINT_MAX
+#  else
+#   error
+#  endif
+# endif
+# define SIZE_MAX SIZE_T_MAX
+#endif
+
+#ifndef FLaC__INLINE
+#define FLaC__INLINE
+#endif
+
+/* avoid malloc()ing 0 bytes, see:
+ * https://www.securecoding.cert.org/confluence/display/seccode/MEM04-A.+Do+not+make+assumptions+about+the+result+of+allocating+0+bytes?focusedCommentId=5407003
+*/
+static FLaC__INLINE void *safe_malloc_(size_t size)
+{
+	/* malloc(0) is undefined; FLAC src convention is to always allocate */
+	if(!size)
+		size++;
+	return malloc(size);
+}
+
+static FLaC__INLINE void *safe_calloc_(size_t nmemb, size_t size)
+{
+	if(!nmemb || !size)
+		return malloc(1); /* malloc(0) is undefined; FLAC src convention is to always allocate */
+	return calloc(nmemb, size);
+}
+
+/*@@@@ there's probably a better way to prevent overflows when allocating untrusted sums but this works for now */
+
+static FLaC__INLINE void *safe_malloc_add_2op_(size_t size1, size_t size2)
+{
+	size2 += size1;
+	if(size2 < size1)
+		return 0;
+	return safe_malloc_(size2);
+}
+
+static FLaC__INLINE void *safe_malloc_add_3op_(size_t size1, size_t size2, size_t size3)
+{
+	size2 += size1;
+	if(size2 < size1)
+		return 0;
+	size3 += size2;
+	if(size3 < size2)
+		return 0;
+	return safe_malloc_(size3);
+}
+
+static FLaC__INLINE void *safe_malloc_add_4op_(size_t size1, size_t size2, size_t size3, size_t size4)
+{
+	size2 += size1;
+	if(size2 < size1)
+		return 0;
+	size3 += size2;
+	if(size3 < size2)
+		return 0;
+	size4 += size3;
+	if(size4 < size3)
+		return 0;
+	return safe_malloc_(size4);
+}
+
+static FLaC__INLINE void *safe_malloc_mul_2op_(size_t size1, size_t size2)
+#if 0
+needs support for cases where sizeof(size_t) != 4
+{
+	/* could be faster #ifdef'ing off SIZEOF_SIZE_T */
+	if(sizeof(size_t) == 4) {
+		if ((double)size1 * (double)size2 < 4294967296.0)
+			return malloc(size1*size2);
+	}
+	return 0;
+}
+#else
+/* better? */
+{
+	if(!size1 || !size2)
+		return malloc(1); /* malloc(0) is undefined; FLAC src convention is to always allocate */
+	if(size1 > SIZE_MAX / size2)
+		return 0;
+	return malloc(size1*size2);
+}
+#endif
+
+static FLaC__INLINE void *safe_malloc_mul_3op_(size_t size1, size_t size2, size_t size3)
+{
+	if(!size1 || !size2 || !size3)
+		return malloc(1); /* malloc(0) is undefined; FLAC src convention is to always allocate */
+	if(size1 > SIZE_MAX / size2)
+		return 0;
+	size1 *= size2;
+	if(size1 > SIZE_MAX / size3)
+		return 0;
+	return malloc(size1*size3);
+}
+
+/* size1*size2 + size3 */
+static FLaC__INLINE void *safe_malloc_mul2add_(size_t size1, size_t size2, size_t size3)
+{
+	if(!size1 || !size2)
+		return safe_malloc_(size3);
+	if(size1 > SIZE_MAX / size2)
+		return 0;
+	return safe_malloc_add_2op_(size1*size2, size3);
+}
+
+/* size1 * (size2 + size3) */
+static FLaC__INLINE void *safe_malloc_muladd2_(size_t size1, size_t size2, size_t size3)
+{
+	if(!size1 || (!size2 && !size3))
+		return malloc(1); /* malloc(0) is undefined; FLAC src convention is to always allocate */
+	size2 += size3;
+	if(size2 < size3)
+		return 0;
+	return safe_malloc_mul_2op_(size1, size2);
+}
+
+static FLaC__INLINE void *safe_realloc_add_2op_(void *ptr, size_t size1, size_t size2)
+{
+	size2 += size1;
+	if(size2 < size1)
+		return 0;
+	return realloc(ptr, size2);
+}
+
+static FLaC__INLINE void *safe_realloc_add_3op_(void *ptr, size_t size1, size_t size2, size_t size3)
+{
+	size2 += size1;
+	if(size2 < size1)
+		return 0;
+	size3 += size2;
+	if(size3 < size2)
+		return 0;
+	return realloc(ptr, size3);
+}
+
+static FLaC__INLINE void *safe_realloc_add_4op_(void *ptr, size_t size1, size_t size2, size_t size3, size_t size4)
+{
+	size2 += size1;
+	if(size2 < size1)
+		return 0;
+	size3 += size2;
+	if(size3 < size2)
+		return 0;
+	size4 += size3;
+	if(size4 < size3)
+		return 0;
+	return realloc(ptr, size4);
+}
+
+static FLaC__INLINE void *safe_realloc_mul_2op_(void *ptr, size_t size1, size_t size2)
+{
+	if(!size1 || !size2)
+		return realloc(ptr, 0); /* preserve POSIX realloc(ptr, 0) semantics */
+	if(size1 > SIZE_MAX / size2)
+		return 0;
+	return realloc(ptr, size1*size2);
+}
+
+/* size1 * (size2 + size3) */
+static FLaC__INLINE void *safe_realloc_muladd2_(void *ptr, size_t size1, size_t size2, size_t size3)
+{
+	if(!size1 || (!size2 && !size3))
+		return realloc(ptr, 0); /* preserve POSIX realloc(ptr, 0) semantics */
+	size2 += size3;
+	if(size2 < size3)
+		return 0;
+	return safe_realloc_mul_2op_(ptr, size1, size2);
+}
+
+#endif
diff --git a/FLAC/stream_decoder.c b/FLAC/stream_decoder.c
index 25fdfb00b2..1751e50f86 100644
--- a/FLAC/stream_decoder.c
+++ b/FLAC/stream_decoder.c
@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000,2001,2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,24 +29,43 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#if defined _MSC_VER || defined __MINGW32__
+#define _CRT_SECURE_NO_WARNINGS
+#include <io.h> /* for _setmode() */
+#include <fcntl.h> /* for _O_BINARY */
+#endif
+#if defined __CYGWIN__ || defined __EMX__
+#include <io.h> /* for setmode(), O_BINARY */
+#include <fcntl.h> /* for _O_BINARY */
+#endif
 #include <stdio.h>
 #include <stdlib.h> /* for malloc() */
 #include <string.h> /* for memset/memcpy() */
+#include <sys/stat.h> /* for stat() */
+#include <sys/types.h> /* for off_t */
+#if defined _MSC_VER || defined __BORLANDC__ || defined __MINGW32__
+#if _MSC_VER <= 1600 || defined __BORLANDC__ /* @@@ [2G limit] */
+#define fseeko fseek
+#define ftello ftell
+#endif
+#endif
 #include "FLAC/assert.h"
+#include "share/alloc.h"
 #include "protected/stream_decoder.h"
-#include "private/bitbuffer.h"
+#include "private/bitreader.h"
 #include "private/bitmath.h"
 #include "private/cpu.h"
 #include "private/crc.h"
 #include "private/fixed.h"
 #include "private/format.h"
 #include "private/lpc.h"
+#include "private/md5.h"
 #include "private/memory.h"
 
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
 #ifdef max
 #undef max
 #endif
@@ -59,6 +78,17 @@
 #define FLAC__U64L(x) x##LLU
 #endif
 
+
+/* technically this should be in an "export.c" but this is convenient enough */
+FLAC_API int FLAC_API_SUPPORTS_OGG_FLAC =
+#if FLAC__HAS_OGG
+	1
+#else
+	0
+#endif
+;
+
+
 /***********************************************************************
  *
  * Private static data
@@ -74,6 +104,7 @@ static FLAC__byte ID3V2_TAG_[3] = { 'I', 'D', '3' };
  ***********************************************************************/
 
 static void set_defaults_(FLAC__StreamDecoder *decoder);
+static FILE *get_binary_stdin_(void);
 static FLAC__bool allocate_output_(FLAC__StreamDecoder *decoder, unsigned size, unsigned channels);
 static FLAC__bool has_id_filtered_(FLAC__StreamDecoder *decoder, FLAC__byte *id);
 static FLAC__bool find_metadata_(FLAC__StreamDecoder *decoder);
@@ -82,6 +113,7 @@ static FLAC__bool read_metadata_streaminfo_(FLAC__StreamDecoder *decoder, FLAC__
 static FLAC__bool read_metadata_seektable_(FLAC__StreamDecoder *decoder, FLAC__bool is_last, unsigned length);
 static FLAC__bool read_metadata_vorbiscomment_(FLAC__StreamDecoder *decoder, FLAC__StreamMetadata_VorbisComment *obj);
 static FLAC__bool read_metadata_cuesheet_(FLAC__StreamDecoder *decoder, FLAC__StreamMetadata_CueSheet *obj);
+static FLAC__bool read_metadata_picture_(FLAC__StreamDecoder *decoder, FLAC__StreamMetadata_Picture *obj);
 static FLAC__bool skip_id3v2_tag_(FLAC__StreamDecoder *decoder);
 static FLAC__bool frame_sync_(FLAC__StreamDecoder *decoder);
 static FLAC__bool read_frame_(FLAC__StreamDecoder *decoder, FLAC__bool *got_a_frame, FLAC__bool do_full_decode);
@@ -91,9 +123,24 @@ static FLAC__bool read_subframe_constant_(FLAC__StreamDecoder *decoder, unsigned
 static FLAC__bool read_subframe_fixed_(FLAC__StreamDecoder *decoder, unsigned channel, unsigned bps, const unsigned order, FLAC__bool do_full_decode);
 static FLAC__bool read_subframe_lpc_(FLAC__StreamDecoder *decoder, unsigned channel, unsigned bps, const unsigned order, FLAC__bool do_full_decode);
 static FLAC__bool read_subframe_verbatim_(FLAC__StreamDecoder *decoder, unsigned channel, unsigned bps, FLAC__bool do_full_decode);
-static FLAC__bool read_residual_partitioned_rice_(FLAC__StreamDecoder *decoder, unsigned predictor_order, unsigned partition_order, FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents, FLAC__int32 *residual);
+static FLAC__bool read_residual_partitioned_rice_(FLAC__StreamDecoder *decoder, unsigned predictor_order, unsigned partition_order, FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents, FLAC__int32 *residual, FLAC__bool is_extended);
 static FLAC__bool read_zero_padding_(FLAC__StreamDecoder *decoder);
-static FLAC__bool read_callback_(FLAC__byte buffer[], unsigned *bytes, void *client_data);
+static FLAC__bool read_callback_(FLAC__byte buffer[], size_t *bytes, void *client_data);
+#if FLAC__HAS_OGG
+static FLAC__StreamDecoderReadStatus read_callback_ogg_aspect_(const FLAC__StreamDecoder *decoder, FLAC__byte buffer[], size_t *bytes);
+static FLAC__OggDecoderAspectReadStatus read_callback_proxy_(const void *void_decoder, FLAC__byte buffer[], size_t *bytes, void *client_data);
+#endif
+static FLAC__StreamDecoderWriteStatus write_audio_frame_to_client_(FLAC__StreamDecoder *decoder, const FLAC__Frame *frame, const FLAC__int32 * const buffer[]);
+static void send_error_to_client_(const FLAC__StreamDecoder *decoder, FLAC__StreamDecoderErrorStatus status);
+static FLAC__bool seek_to_absolute_sample_(FLAC__StreamDecoder *decoder, FLAC__uint64 stream_length, FLAC__uint64 target_sample);
+#if FLAC__HAS_OGG
+static FLAC__bool seek_to_absolute_sample_ogg_(FLAC__StreamDecoder *decoder, FLAC__uint64 stream_length, FLAC__uint64 target_sample);
+#endif
+static FLAC__StreamDecoderReadStatus file_read_callback_(const FLAC__StreamDecoder *decoder, FLAC__byte buffer[], size_t *bytes, void *client_data);
+static FLAC__StreamDecoderSeekStatus file_seek_callback_(const FLAC__StreamDecoder *decoder, FLAC__uint64 absolute_byte_offset, void *client_data);
+static FLAC__StreamDecoderTellStatus file_tell_callback_(const FLAC__StreamDecoder *decoder, FLAC__uint64 *absolute_byte_offset, void *client_data);
+static FLAC__StreamDecoderLengthStatus file_length_callback_(const FLAC__StreamDecoder *decoder, FLAC__uint64 *stream_length, void *client_data);
+static FLAC__bool file_eof_callback_(const FLAC__StreamDecoder *decoder, void *client_data);
 
 /***********************************************************************
  *
@@ -102,7 +149,14 @@ static FLAC__bool read_callback_(FLAC__byte buffer[], unsigned *bytes, void *cli
  ***********************************************************************/
 
 typedef struct FLAC__StreamDecoderPrivate {
+#if FLAC__HAS_OGG
+	FLAC__bool is_ogg;
+#endif
 	FLAC__StreamDecoderReadCallback read_callback;
+	FLAC__StreamDecoderSeekCallback seek_callback;
+	FLAC__StreamDecoderTellCallback tell_callback;
+	FLAC__StreamDecoderLengthCallback length_callback;
+	FLAC__StreamDecoderEofCallback eof_callback;
 	FLAC__StreamDecoderWriteCallback write_callback;
 	FLAC__StreamDecoderMetadataCallback metadata_callback;
 	FLAC__StreamDecoderErrorCallback error_callback;
@@ -114,21 +168,22 @@ typedef struct FLAC__StreamDecoderPrivate {
 	void (*local_lpc_restore_signal_16bit)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
 	/* for use when the signal is <= 16 bits-per-sample, or <= 15 bits-per-sample on a side channel (which requires 1 extra bit), AND order <= 8: */
 	void (*local_lpc_restore_signal_16bit_order8)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
+	FLAC__bool (*local_bitreader_read_rice_signed_block)(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter);
 	void *client_data;
-	FLAC__BitBuffer *input;
+	FILE *file; /* only used if FLAC__stream_decoder_init_file()/FLAC__stream_decoder_init_file() called, else NULL */
+	FLAC__BitReader *input;
 	FLAC__int32 *output[FLAC__MAX_CHANNELS];
 	FLAC__int32 *residual[FLAC__MAX_CHANNELS]; /* WATCHOUT: these are the aligned pointers; the real pointers that should be free()'d are residual_unaligned[] below */
 	FLAC__EntropyCodingMethod_PartitionedRiceContents partitioned_rice_contents[FLAC__MAX_CHANNELS];
 	unsigned output_capacity, output_channels;
-	FLAC__uint32 last_frame_number;
-	FLAC__uint32 last_block_size;
+	FLAC__uint32 fixed_block_size, next_fixed_block_size;
 	FLAC__uint64 samples_decoded;
 	FLAC__bool has_stream_info, has_seek_table;
 	FLAC__StreamMetadata stream_info;
 	FLAC__StreamMetadata seek_table;
 	FLAC__bool metadata_filter[128]; /* MAGIC number 128 == total number of metadata block types == 1 << 7 */
 	FLAC__byte *metadata_filter_ids;
-	unsigned metadata_filter_ids_count, metadata_filter_ids_capacity; /* units for both are IDs, not bytes */
+	size_t metadata_filter_ids_count, metadata_filter_ids_capacity; /* units for both are IDs, not bytes */
 	FLAC__Frame frame;
 	FLAC__bool cached; /* true if there is a byte in lookahead */
 	FLAC__CPUInfo cpuinfo;
@@ -136,6 +191,19 @@ typedef struct FLAC__StreamDecoderPrivate {
 	FLAC__byte lookahead; /* temp storage when we need to look ahead one byte in the stream */
 	/* unaligned (original) pointers to allocated data */
 	FLAC__int32 *residual_unaligned[FLAC__MAX_CHANNELS];
+	FLAC__bool do_md5_checking; /* initially gets protected_->md5_checking but is turned off after a seek or if the metadata has a zero MD5 */
+	FLAC__bool internal_reset_hack; /* used only during init() so we can call reset to set up the decoder without rewinding the input */
+	FLAC__bool is_seeking;
+	FLAC__MD5Context md5context;
+	FLAC__byte computed_md5sum[16]; /* this is the sum we computed from the decoded data */
+	/* (the rest of these are only used for seeking) */
+	FLAC__Frame last_frame; /* holds the info of the last frame we seeked to */
+	FLAC__uint64 first_frame_offset; /* hint to the seek routine of where in the stream the first audio frame starts */
+	FLAC__uint64 target_sample;
+	unsigned unparseable_frame_count; /* used to tell whether we're decoding a future version of FLAC or just got a bad sync */
+#if FLAC__HAS_OGG
+	FLAC__bool got_a_frame; /* hack needed in Ogg FLAC seek routine to check when process_single() actually writes a frame */
+#endif
 } FLAC__StreamDecoderPrivate;
 
 /***********************************************************************
@@ -150,20 +218,46 @@ FLAC_API const char * const FLAC__StreamDecoderStateString[] = {
 	"FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC",
 	"FLAC__STREAM_DECODER_READ_FRAME",
 	"FLAC__STREAM_DECODER_END_OF_STREAM",
+	"FLAC__STREAM_DECODER_OGG_ERROR",
+	"FLAC__STREAM_DECODER_SEEK_ERROR",
 	"FLAC__STREAM_DECODER_ABORTED",
-	"FLAC__STREAM_DECODER_UNPARSEABLE_STREAM",
 	"FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR",
-	"FLAC__STREAM_DECODER_ALREADY_INITIALIZED",
-	"FLAC__STREAM_DECODER_INVALID_CALLBACK",
 	"FLAC__STREAM_DECODER_UNINITIALIZED"
 };
 
+FLAC_API const char * const FLAC__StreamDecoderInitStatusString[] = {
+	"FLAC__STREAM_DECODER_INIT_STATUS_OK",
+	"FLAC__STREAM_DECODER_INIT_STATUS_UNSUPPORTED_CONTAINER",
+	"FLAC__STREAM_DECODER_INIT_STATUS_INVALID_CALLBACKS",
+	"FLAC__STREAM_DECODER_INIT_STATUS_MEMORY_ALLOCATION_ERROR",
+	"FLAC__STREAM_DECODER_INIT_STATUS_ERROR_OPENING_FILE",
+	"FLAC__STREAM_DECODER_INIT_STATUS_ALREADY_INITIALIZED"
+};
+
 FLAC_API const char * const FLAC__StreamDecoderReadStatusString[] = {
 	"FLAC__STREAM_DECODER_READ_STATUS_CONTINUE",
 	"FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM",
 	"FLAC__STREAM_DECODER_READ_STATUS_ABORT"
 };
 
+FLAC_API const char * const FLAC__StreamDecoderSeekStatusString[] = {
+	"FLAC__STREAM_DECODER_SEEK_STATUS_OK",
+	"FLAC__STREAM_DECODER_SEEK_STATUS_ERROR",
+	"FLAC__STREAM_DECODER_SEEK_STATUS_UNSUPPORTED"
+};
+
+FLAC_API const char * const FLAC__StreamDecoderTellStatusString[] = {
+	"FLAC__STREAM_DECODER_TELL_STATUS_OK",
+	"FLAC__STREAM_DECODER_TELL_STATUS_ERROR",
+	"FLAC__STREAM_DECODER_TELL_STATUS_UNSUPPORTED"
+};
+
+FLAC_API const char * const FLAC__StreamDecoderLengthStatusString[] = {
+	"FLAC__STREAM_DECODER_LENGTH_STATUS_OK",
+	"FLAC__STREAM_DECODER_LENGTH_STATUS_ERROR",
+	"FLAC__STREAM_DECODER_LENGTH_STATUS_UNSUPPORTED"
+};
+
 FLAC_API const char * const FLAC__StreamDecoderWriteStatusString[] = {
 	"FLAC__STREAM_DECODER_WRITE_STATUS_CONTINUE",
 	"FLAC__STREAM_DECODER_WRITE_STATUS_ABORT"
@@ -172,7 +266,8 @@ FLAC_API const char * const FLAC__StreamDecoderWriteStatusString[] = {
 FLAC_API const char * const FLAC__StreamDecoderErrorStatusString[] = {
 	"FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC",
 	"FLAC__STREAM_DECODER_ERROR_STATUS_BAD_HEADER",
-	"FLAC__STREAM_DECODER_ERROR_STATUS_FRAME_CRC_MISMATCH"
+	"FLAC__STREAM_DECODER_ERROR_STATUS_FRAME_CRC_MISMATCH",
+	"FLAC__STREAM_DECODER_ERROR_STATUS_UNPARSEABLE_STREAM"
 };
 
 /***********************************************************************
@@ -180,7 +275,7 @@ FLAC_API const char * const FLAC__StreamDecoderErrorStatusString[] = {
  * Class constructor/destructor
  *
  ***********************************************************************/
-FLAC_API FLAC__StreamDecoder *FLAC__stream_decoder_new()
+FLAC_API FLAC__StreamDecoder *FLAC__stream_decoder_new(void)
 {
 	FLAC__StreamDecoder *decoder;
 	unsigned i;
@@ -205,7 +300,7 @@ FLAC_API FLAC__StreamDecoder *FLAC__stream_decoder_new()
 		return 0;
 	}
 
-	decoder->private_->input = FLAC__bitbuffer_new();
+	decoder->private_->input = FLAC__bitreader_new();
 	if(decoder->private_->input == 0) {
 		free(decoder->private_);
 		free(decoder->protected_);
@@ -215,7 +310,7 @@ FLAC_API FLAC__StreamDecoder *FLAC__stream_decoder_new()
 
 	decoder->private_->metadata_filter_ids_capacity = 16;
 	if(0 == (decoder->private_->metadata_filter_ids = (FLAC__byte*)malloc((FLAC__STREAM_METADATA_APPLICATION_ID_LEN/8) * decoder->private_->metadata_filter_ids_capacity))) {
-		FLAC__bitbuffer_delete(decoder->private_->input);
+		FLAC__bitreader_delete(decoder->private_->input);
 		free(decoder->private_);
 		free(decoder->protected_);
 		free(decoder);
@@ -234,6 +329,8 @@ FLAC_API FLAC__StreamDecoder *FLAC__stream_decoder_new()
 	for(i = 0; i < FLAC__MAX_CHANNELS; i++)
 		FLAC__format_entropy_coding_method_partitioned_rice_contents_init(&decoder->private_->partitioned_rice_contents[i]);
 
+	decoder->private_->file = 0;
+
 	set_defaults_(decoder);
 
 	decoder->protected_->state = FLAC__STREAM_DECODER_UNINITIALIZED;
@@ -250,12 +347,12 @@ FLAC_API void FLAC__stream_decoder_delete(FLAC__StreamDecoder *decoder)
 	FLAC__ASSERT(0 != decoder->private_);
 	FLAC__ASSERT(0 != decoder->private_->input);
 
-	FLAC__stream_decoder_finish(decoder);
+	(void)FLAC__stream_decoder_finish(decoder);
 
 	if(0 != decoder->private_->metadata_filter_ids)
 		free(decoder->private_->metadata_filter_ids);
 
-	FLAC__bitbuffer_delete(decoder->private_->input);
+	FLAC__bitreader_delete(decoder->private_->input);
 
 	for(i = 0; i < FLAC__MAX_CHANNELS; i++)
 		FLAC__format_entropy_coding_method_partitioned_rice_contents_clear(&decoder->private_->partitioned_rice_contents[i]);
@@ -271,24 +368,43 @@ FLAC_API void FLAC__stream_decoder_delete(FLAC__StreamDecoder *decoder)
  *
  ***********************************************************************/
 
-FLAC_API FLAC__StreamDecoderState FLAC__stream_decoder_init(FLAC__StreamDecoder *decoder)
+static FLAC__StreamDecoderInitStatus init_stream_internal_(
+	FLAC__StreamDecoder *decoder,
+	FLAC__StreamDecoderReadCallback read_callback,
+	FLAC__StreamDecoderSeekCallback seek_callback,
+	FLAC__StreamDecoderTellCallback tell_callback,
+	FLAC__StreamDecoderLengthCallback length_callback,
+	FLAC__StreamDecoderEofCallback eof_callback,
+	FLAC__StreamDecoderWriteCallback write_callback,
+	FLAC__StreamDecoderMetadataCallback metadata_callback,
+	FLAC__StreamDecoderErrorCallback error_callback,
+	void *client_data,
+	FLAC__bool is_ogg
+)
 {
 	FLAC__ASSERT(0 != decoder);
 
 	if(decoder->protected_->state != FLAC__STREAM_DECODER_UNINITIALIZED)
-		return decoder->protected_->state = FLAC__STREAM_DECODER_ALREADY_INITIALIZED;
+		return FLAC__STREAM_DECODER_INIT_STATUS_ALREADY_INITIALIZED;
 
-	if(0 == decoder->private_->read_callback || 0 == decoder->private_->write_callback || 0 == decoder->private_->metadata_callback || 0 == decoder->private_->error_callback)
-		return decoder->protected_->state = FLAC__STREAM_DECODER_INVALID_CALLBACK;
+#if !FLAC__HAS_OGG
+	if(is_ogg)
+		return FLAC__STREAM_DECODER_INIT_STATUS_UNSUPPORTED_CONTAINER;
+#endif
 
-	if(!FLAC__bitbuffer_init(decoder->private_->input))
-		return decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
+	if(
+		0 == read_callback ||
+		0 == write_callback ||
+		0 == error_callback ||
+		(seek_callback && (0 == tell_callback || 0 == length_callback || 0 == eof_callback))
+	)
+		return FLAC__STREAM_DECODER_INIT_STATUS_INVALID_CALLBACKS;
 
-	decoder->private_->last_frame_number = 0;
-	decoder->private_->last_block_size = 0;
-	decoder->private_->samples_decoded = 0;
-	decoder->private_->has_stream_info = false;
-	decoder->private_->cached = false;
+#if FLAC__HAS_OGG
+	decoder->private_->is_ogg = is_ogg;
+	if(is_ogg && !FLAC__ogg_decoder_aspect_init(&decoder->protected_->ogg_decoder_aspect))
+		return decoder->protected_->state = FLAC__STREAM_DECODER_OGG_ERROR;
+#endif
 
 	/*
 	 * get the CPU info and set the function pointers
@@ -299,12 +415,17 @@ FLAC_API FLAC__StreamDecoderState FLAC__stream_decoder_init(FLAC__StreamDecoder
 	decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide;
 	decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal;
 	decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal;
+	decoder->private_->local_bitreader_read_rice_signed_block = FLAC__bitreader_read_rice_signed_block;
 	/* now override with asm where appropriate */
 #ifndef FLAC__NO_ASM
 	if(decoder->private_->cpuinfo.use_asm) {
 #ifdef FLAC__CPU_IA32
 		FLAC__ASSERT(decoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_IA32);
 #ifdef FLAC__HAS_NASM
+#if 1 /*@@@@@@ OPT: not clearly faster, needs more testing */
+		if(decoder->private_->cpuinfo.data.ia32.bswap)
+			decoder->private_->local_bitreader_read_rice_signed_block = FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap;
+#endif
 		if(decoder->private_->cpuinfo.data.ia32.mmx) {
 			decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal_asm_ia32;
 			decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ia32_mmx;
@@ -326,24 +447,243 @@ FLAC_API FLAC__StreamDecoderState FLAC__stream_decoder_init(FLAC__StreamDecoder
 	}
 #endif
 
-	if(!FLAC__stream_decoder_reset(decoder))
-		return decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
+	/* from here on, errors are fatal */
 
-	return decoder->protected_->state;
+	if(!FLAC__bitreader_init(decoder->private_->input, decoder->private_->cpuinfo, read_callback_, decoder)) {
+		decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
+		return FLAC__STREAM_DECODER_INIT_STATUS_MEMORY_ALLOCATION_ERROR;
+	}
+
+	decoder->private_->read_callback = read_callback;
+	decoder->private_->seek_callback = seek_callback;
+	decoder->private_->tell_callback = tell_callback;
+	decoder->private_->length_callback = length_callback;
+	decoder->private_->eof_callback = eof_callback;
+	decoder->private_->write_callback = write_callback;
+	decoder->private_->metadata_callback = metadata_callback;
+	decoder->private_->error_callback = error_callback;
+	decoder->private_->client_data = client_data;
+	decoder->private_->fixed_block_size = decoder->private_->next_fixed_block_size = 0;
+	decoder->private_->samples_decoded = 0;
+	decoder->private_->has_stream_info = false;
+	decoder->private_->cached = false;
+
+	decoder->private_->do_md5_checking = decoder->protected_->md5_checking;
+	decoder->private_->is_seeking = false;
+
+	decoder->private_->internal_reset_hack = true; /* so the following reset does not try to rewind the input */
+	if(!FLAC__stream_decoder_reset(decoder)) {
+		/* above call sets the state for us */
+		return FLAC__STREAM_DECODER_INIT_STATUS_MEMORY_ALLOCATION_ERROR;
+	}
+
+	return FLAC__STREAM_DECODER_INIT_STATUS_OK;
 }
 
-FLAC_API void FLAC__stream_decoder_finish(FLAC__StreamDecoder *decoder)
+FLAC_API FLAC__StreamDecoderInitStatus FLAC__stream_decoder_init_stream(
+	FLAC__StreamDecoder *decoder,
+	FLAC__StreamDecoderReadCallback read_callback,
+	FLAC__StreamDecoderSeekCallback seek_callback,
+	FLAC__StreamDecoderTellCallback tell_callback,
+	FLAC__StreamDecoderLengthCallback length_callback,
+	FLAC__StreamDecoderEofCallback eof_callback,
+	FLAC__StreamDecoderWriteCallback write_callback,
+	FLAC__StreamDecoderMetadataCallback metadata_callback,
+	FLAC__StreamDecoderErrorCallback error_callback,
+	void *client_data
+)
+{
+	return init_stream_internal_(
+		decoder,
+		read_callback,
+		seek_callback,
+		tell_callback,
+		length_callback,
+		eof_callback,
+		write_callback,
+		metadata_callback,
+		error_callback,
+		client_data,
+		/*is_ogg=*/false
+	);
+}
+
+FLAC_API FLAC__StreamDecoderInitStatus FLAC__stream_decoder_init_ogg_stream(
+	FLAC__StreamDecoder *decoder,
+	FLAC__StreamDecoderReadCallback read_callback,
+	FLAC__StreamDecoderSeekCallback seek_callback,
+	FLAC__StreamDecoderTellCallback tell_callback,
+	FLAC__StreamDecoderLengthCallback length_callback,
+	FLAC__StreamDecoderEofCallback eof_callback,
+	FLAC__StreamDecoderWriteCallback write_callback,
+	FLAC__StreamDecoderMetadataCallback metadata_callback,
+	FLAC__StreamDecoderErrorCallback error_callback,
+	void *client_data
+)
+{
+	return init_stream_internal_(
+		decoder,
+		read_callback,
+		seek_callback,
+		tell_callback,
+		length_callback,
+		eof_callback,
+		write_callback,
+		metadata_callback,
+		error_callback,
+		client_data,
+		/*is_ogg=*/true
+	);
+}
+
+static FLAC__StreamDecoderInitStatus init_FILE_internal_(
+	FLAC__StreamDecoder *decoder,
+	FILE *file,
+	FLAC__StreamDecoderWriteCallback write_callback,
+	FLAC__StreamDecoderMetadataCallback metadata_callback,
+	FLAC__StreamDecoderErrorCallback error_callback,
+	void *client_data,
+	FLAC__bool is_ogg
+)
 {
-	unsigned i;
 	FLAC__ASSERT(0 != decoder);
+	FLAC__ASSERT(0 != file);
+
+	if(decoder->protected_->state != FLAC__STREAM_DECODER_UNINITIALIZED)
+		return decoder->protected_->state = FLAC__STREAM_DECODER_INIT_STATUS_ALREADY_INITIALIZED;
+
+	if(0 == write_callback || 0 == error_callback)
+		return decoder->protected_->state = FLAC__STREAM_DECODER_INIT_STATUS_INVALID_CALLBACKS;
+
+	/*
+	 * To make sure that our file does not go unclosed after an error, we
+	 * must assign the FILE pointer before any further error can occur in
+	 * this routine.
+	 */
+	if(file == stdin)
+		file = get_binary_stdin_(); /* just to be safe */
+
+	decoder->private_->file = file;
+
+	return init_stream_internal_(
+		decoder,
+		file_read_callback_,
+		decoder->private_->file == stdin? 0: file_seek_callback_,
+		decoder->private_->file == stdin? 0: file_tell_callback_,
+		decoder->private_->file == stdin? 0: file_length_callback_,
+		file_eof_callback_,
+		write_callback,
+		metadata_callback,
+		error_callback,
+		client_data,
+		is_ogg
+	);
+}
+
+FLAC_API FLAC__StreamDecoderInitStatus FLAC__stream_decoder_init_FILE(
+	FLAC__StreamDecoder *decoder,
+	FILE *file,
+	FLAC__StreamDecoderWriteCallback write_callback,
+	FLAC__StreamDecoderMetadataCallback metadata_callback,
+	FLAC__StreamDecoderErrorCallback error_callback,
+	void *client_data
+)
+{
+	return init_FILE_internal_(decoder, file, write_callback, metadata_callback, error_callback, client_data, /*is_ogg=*/false);
+}
+
+FLAC_API FLAC__StreamDecoderInitStatus FLAC__stream_decoder_init_ogg_FILE(
+	FLAC__StreamDecoder *decoder,
+	FILE *file,
+	FLAC__StreamDecoderWriteCallback write_callback,
+	FLAC__StreamDecoderMetadataCallback metadata_callback,
+	FLAC__StreamDecoderErrorCallback error_callback,
+	void *client_data
+)
+{
+	return init_FILE_internal_(decoder, file, write_callback, metadata_callback, error_callback, client_data, /*is_ogg=*/true);
+}
+
+static FLAC__StreamDecoderInitStatus init_file_internal_(
+	FLAC__StreamDecoder *decoder,
+	const char *filename,
+	FLAC__StreamDecoderWriteCallback write_callback,
+	FLAC__StreamDecoderMetadataCallback metadata_callback,
+	FLAC__StreamDecoderErrorCallback error_callback,
+	void *client_data,
+	FLAC__bool is_ogg
+)
+{
+	FILE *file;
+
+	FLAC__ASSERT(0 != decoder);
+
+	/*
+	 * To make sure that our file does not go unclosed after an error, we
+	 * have to do the same entrance checks here that are later performed
+	 * in FLAC__stream_decoder_init_FILE() before the FILE* is assigned.
+	 */
+	if(decoder->protected_->state != FLAC__STREAM_DECODER_UNINITIALIZED)
+		return decoder->protected_->state = FLAC__STREAM_DECODER_INIT_STATUS_ALREADY_INITIALIZED;
+
+	if(0 == write_callback || 0 == error_callback)
+		return decoder->protected_->state = FLAC__STREAM_DECODER_INIT_STATUS_INVALID_CALLBACKS;
+
+	file = filename? fopen(filename, "rb") : stdin;
+
+	if(0 == file)
+		return FLAC__STREAM_DECODER_INIT_STATUS_ERROR_OPENING_FILE;
+
+	return init_FILE_internal_(decoder, file, write_callback, metadata_callback, error_callback, client_data, is_ogg);
+}
+
+FLAC_API FLAC__StreamDecoderInitStatus FLAC__stream_decoder_init_file(
+	FLAC__StreamDecoder *decoder,
+	const char *filename,
+	FLAC__StreamDecoderWriteCallback write_callback,
+	FLAC__StreamDecoderMetadataCallback metadata_callback,
+	FLAC__StreamDecoderErrorCallback error_callback,
+	void *client_data
+)
+{
+	return init_file_internal_(decoder, filename, write_callback, metadata_callback, error_callback, client_data, /*is_ogg=*/false);
+}
+
+FLAC_API FLAC__StreamDecoderInitStatus FLAC__stream_decoder_init_ogg_file(
+	FLAC__StreamDecoder *decoder,
+	const char *filename,
+	FLAC__StreamDecoderWriteCallback write_callback,
+	FLAC__StreamDecoderMetadataCallback metadata_callback,
+	FLAC__StreamDecoderErrorCallback error_callback,
+	void *client_data
+)
+{
+	return init_file_internal_(decoder, filename, write_callback, metadata_callback, error_callback, client_data, /*is_ogg=*/true);
+}
+
+FLAC_API FLAC__bool FLAC__stream_decoder_finish(FLAC__StreamDecoder *decoder)
+{
+	FLAC__bool md5_failed = false;
+	unsigned i;
+
+	FLAC__ASSERT(0 != decoder);
+	FLAC__ASSERT(0 != decoder->private_);
+	FLAC__ASSERT(0 != decoder->protected_);
+
 	if(decoder->protected_->state == FLAC__STREAM_DECODER_UNINITIALIZED)
-		return;
-	if(0 != decoder->private_->seek_table.data.seek_table.points) {
+		return true;
+
+	/* see the comment in FLAC__seekable_stream_decoder_reset() as to why we
+	 * always call FLAC__MD5Final()
+	 */
+	FLAC__MD5Final(decoder->private_->computed_md5sum, &decoder->private_->md5context);
+
+	if(decoder->private_->has_seek_table && 0 != decoder->private_->seek_table.data.seek_table.points) {
 		free(decoder->private_->seek_table.data.seek_table.points);
 		decoder->private_->seek_table.data.seek_table.points = 0;
 		decoder->private_->has_seek_table = false;
 	}
-	FLAC__bitbuffer_free(decoder->private_->input);
+	FLAC__bitreader_free(decoder->private_->input);
 	for(i = 0; i < FLAC__MAX_CHANNELS; i++) {
 		/* WATCHOUT:
 		 * FLAC__lpc_restore_signal_asm_ia32_mmx() requires that the
@@ -363,63 +703,54 @@ FLAC_API void FLAC__stream_decoder_finish(FLAC__StreamDecoder *decoder)
 	decoder->private_->output_capacity = 0;
 	decoder->private_->output_channels = 0;
 
+#if FLAC__HAS_OGG
+	if(decoder->private_->is_ogg)
+		FLAC__ogg_decoder_aspect_finish(&decoder->protected_->ogg_decoder_aspect);
+#endif
+
+	if(0 != decoder->private_->file) {
+		if(decoder->private_->file != stdin)
+			fclose(decoder->private_->file);
+		decoder->private_->file = 0;
+	}
+
+	if(decoder->private_->do_md5_checking) {
+		if(memcmp(decoder->private_->stream_info.data.stream_info.md5sum, decoder->private_->computed_md5sum, 16))
+			md5_failed = true;
+	}
+	decoder->private_->is_seeking = false;
+
 	set_defaults_(decoder);
 
 	decoder->protected_->state = FLAC__STREAM_DECODER_UNINITIALIZED;
+
+	return !md5_failed;
 }
 
-FLAC_API FLAC__bool FLAC__stream_decoder_set_read_callback(FLAC__StreamDecoder *decoder, FLAC__StreamDecoderReadCallback value)
+FLAC_API FLAC__bool FLAC__stream_decoder_set_ogg_serial_number(FLAC__StreamDecoder *decoder, long value)
 {
 	FLAC__ASSERT(0 != decoder);
 	FLAC__ASSERT(0 != decoder->private_);
 	FLAC__ASSERT(0 != decoder->protected_);
 	if(decoder->protected_->state != FLAC__STREAM_DECODER_UNINITIALIZED)
 		return false;
-	decoder->private_->read_callback = value;
+#if FLAC__HAS_OGG
+	/* can't check decoder->private_->is_ogg since that's not set until init time */
+	FLAC__ogg_decoder_aspect_set_serial_number(&decoder->protected_->ogg_decoder_aspect, value);
 	return true;
+#else
+	(void)value;
+	return false;
+#endif
 }
 
-FLAC_API FLAC__bool FLAC__stream_decoder_set_write_callback(FLAC__StreamDecoder *decoder, FLAC__StreamDecoderWriteCallback value)
+FLAC_API FLAC__bool FLAC__stream_decoder_set_md5_checking(FLAC__StreamDecoder *decoder, FLAC__bool value)
 {
 	FLAC__ASSERT(0 != decoder);
-	FLAC__ASSERT(0 != decoder->private_);
 	FLAC__ASSERT(0 != decoder->protected_);
 	if(decoder->protected_->state != FLAC__STREAM_DECODER_UNINITIALIZED)
 		return false;
-	decoder->private_->write_callback = value;
-	return true;
-}
-
-FLAC_API FLAC__bool FLAC__stream_decoder_set_metadata_callback(FLAC__StreamDecoder *decoder, FLAC__StreamDecoderMetadataCallback value)
-{
-	FLAC__ASSERT(0 != decoder);
-	FLAC__ASSERT(0 != decoder->private_);
-	FLAC__ASSERT(0 != decoder->protected_);
-	if(decoder->protected_->state != FLAC__STREAM_DECODER_UNINITIALIZED)
-		return false;
-	decoder->private_->metadata_callback = value;
-	return true;
-}
-
-FLAC_API FLAC__bool FLAC__stream_decoder_set_error_callback(FLAC__StreamDecoder *decoder, FLAC__StreamDecoderErrorCallback value)
-{
-	FLAC__ASSERT(0 != decoder);
-	FLAC__ASSERT(0 != decoder->private_);
-	FLAC__ASSERT(0 != decoder->protected_);
-	if(decoder->protected_->state != FLAC__STREAM_DECODER_UNINITIALIZED)
-		return false;
-	decoder->private_->error_callback = value;
-	return true;
-}
-
-FLAC_API FLAC__bool FLAC__stream_decoder_set_client_data(FLAC__StreamDecoder *decoder, void *value)
-{
-	FLAC__ASSERT(0 != decoder);
-	FLAC__ASSERT(0 != decoder->private_);
-	FLAC__ASSERT(0 != decoder->protected_);
-	if(decoder->protected_->state != FLAC__STREAM_DECODER_UNINITIALIZED)
-		return false;
-	decoder->private_->client_data = value;
+	decoder->protected_->md5_checking = value;
 	return true;
 }
 
@@ -551,6 +882,20 @@ FLAC_API const char *FLAC__stream_decoder_get_resolved_state_string(const FLAC__
 	return FLAC__StreamDecoderStateString[decoder->protected_->state];
 }
 
+FLAC_API FLAC__bool FLAC__stream_decoder_get_md5_checking(const FLAC__StreamDecoder *decoder)
+{
+	FLAC__ASSERT(0 != decoder);
+	FLAC__ASSERT(0 != decoder->protected_);
+	return decoder->protected_->md5_checking;
+}
+
+FLAC_API FLAC__uint64 FLAC__stream_decoder_get_total_samples(const FLAC__StreamDecoder *decoder)
+{
+	FLAC__ASSERT(0 != decoder);
+	FLAC__ASSERT(0 != decoder->protected_);
+	return decoder->private_->has_stream_info? decoder->private_->stream_info.data.stream_info.total_samples : 0;
+}
+
 FLAC_API unsigned FLAC__stream_decoder_get_channels(const FLAC__StreamDecoder *decoder)
 {
 	FLAC__ASSERT(0 != decoder);
@@ -586,18 +931,46 @@ FLAC_API unsigned FLAC__stream_decoder_get_blocksize(const FLAC__StreamDecoder *
 	return decoder->protected_->blocksize;
 }
 
+FLAC_API FLAC__bool FLAC__stream_decoder_get_decode_position(const FLAC__StreamDecoder *decoder, FLAC__uint64 *position)
+{
+	FLAC__ASSERT(0 != decoder);
+	FLAC__ASSERT(0 != decoder->private_);
+	FLAC__ASSERT(0 != position);
+
+#if FLAC__HAS_OGG
+	if(decoder->private_->is_ogg)
+		return false;
+#endif
+	if(0 == decoder->private_->tell_callback)
+		return false;
+	if(decoder->private_->tell_callback(decoder, position, decoder->private_->client_data) != FLAC__STREAM_DECODER_TELL_STATUS_OK)
+		return false;
+	/* should never happen since all FLAC frames and metadata blocks are byte aligned, but check just in case */
+	if(!FLAC__bitreader_is_consumed_byte_aligned(decoder->private_->input))
+		return false;
+	FLAC__ASSERT(*position >= FLAC__stream_decoder_get_input_bytes_unconsumed(decoder));
+	*position -= FLAC__stream_decoder_get_input_bytes_unconsumed(decoder);
+	return true;
+}
+
 FLAC_API FLAC__bool FLAC__stream_decoder_flush(FLAC__StreamDecoder *decoder)
 {
 	FLAC__ASSERT(0 != decoder);
 	FLAC__ASSERT(0 != decoder->private_);
 	FLAC__ASSERT(0 != decoder->protected_);
 
-	if(!FLAC__bitbuffer_clear(decoder->private_->input)) {
+	decoder->private_->samples_decoded = 0;
+	decoder->private_->do_md5_checking = false;
+
+#if FLAC__HAS_OGG
+	if(decoder->private_->is_ogg)
+		FLAC__ogg_decoder_aspect_flush(&decoder->protected_->ogg_decoder_aspect);
+#endif
+
+	if(!FLAC__bitreader_clear(decoder->private_->input)) {
 		decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 		return false;
 	}
-	decoder->private_->last_frame_number = 0;
-	decoder->private_->last_block_size = 0;
 	decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
 
 	return true;
@@ -610,12 +983,55 @@ FLAC_API FLAC__bool FLAC__stream_decoder_reset(FLAC__StreamDecoder *decoder)
 	FLAC__ASSERT(0 != decoder->protected_);
 
 	if(!FLAC__stream_decoder_flush(decoder)) {
-		decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
+		/* above call sets the state for us */
 		return false;
 	}
+
+#if FLAC__HAS_OGG
+	/*@@@ could go in !internal_reset_hack block below */
+	if(decoder->private_->is_ogg)
+		FLAC__ogg_decoder_aspect_reset(&decoder->protected_->ogg_decoder_aspect);
+#endif
+
+	/* Rewind if necessary.  If FLAC__stream_decoder_init() is calling us,
+	 * (internal_reset_hack) don't try to rewind since we are already at
+	 * the beginning of the stream and don't want to fail if the input is
+	 * not seekable.
+	 */
+	if(!decoder->private_->internal_reset_hack) {
+		if(decoder->private_->file == stdin)
+			return false; /* can't rewind stdin, reset fails */
+		if(decoder->private_->seek_callback && decoder->private_->seek_callback(decoder, 0, decoder->private_->client_data) == FLAC__STREAM_DECODER_SEEK_STATUS_ERROR)
+			return false; /* seekable and seek fails, reset fails */
+	}
+	else
+		decoder->private_->internal_reset_hack = false;
+
 	decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_METADATA;
 
-	decoder->private_->samples_decoded = 0;
+	decoder->private_->has_stream_info = false;
+	if(decoder->private_->has_seek_table && 0 != decoder->private_->seek_table.data.seek_table.points) {
+		free(decoder->private_->seek_table.data.seek_table.points);
+		decoder->private_->seek_table.data.seek_table.points = 0;
+		decoder->private_->has_seek_table = false;
+	}
+	decoder->private_->do_md5_checking = decoder->protected_->md5_checking;
+	/*
+	 * This goes in reset() and not flush() because according to the spec, a
+	 * fixed-blocksize stream must stay that way through the whole stream.
+	 */
+	decoder->private_->fixed_block_size = decoder->private_->next_fixed_block_size = 0;
+
+	/* We initialize the FLAC__MD5Context even though we may never use it.  This
+	 * is because md5 checking may be turned on to start and then turned off if
+	 * a seek occurs.  So we init the context here and finalize it in
+	 * FLAC__stream_decoder_finish() to make sure things are always cleaned up
+	 * properly.
+	 */
+	FLAC__MD5Init(&decoder->private_->md5context);
+
+	decoder->private_->first_frame_offset = 0;
+	decoder->private_->unparseable_frame_count = 0;
 
 	return true;
 }
@@ -749,6 +1165,73 @@ FLAC_API FLAC__bool FLAC__stream_decoder_skip_single_frame(FLAC__StreamDecoder *
 	}
 }
 
+FLAC_API FLAC__bool FLAC__stream_decoder_seek_absolute(FLAC__StreamDecoder *decoder, FLAC__uint64 sample)
+{
+	FLAC__uint64 length;
+
+	FLAC__ASSERT(0 != decoder);
+
+	if(
+		decoder->protected_->state != FLAC__STREAM_DECODER_SEARCH_FOR_METADATA &&
+		decoder->protected_->state != FLAC__STREAM_DECODER_READ_METADATA &&
+		decoder->protected_->state != FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC &&
+		decoder->protected_->state != FLAC__STREAM_DECODER_READ_FRAME &&
+		decoder->protected_->state != FLAC__STREAM_DECODER_END_OF_STREAM
+	)
+		return false;
+
+	if(0 == decoder->private_->seek_callback)
+		return false;
+
+	FLAC__ASSERT(decoder->private_->seek_callback);
+	FLAC__ASSERT(decoder->private_->tell_callback);
+	FLAC__ASSERT(decoder->private_->length_callback);
+	FLAC__ASSERT(decoder->private_->eof_callback);
+
+	if(FLAC__stream_decoder_get_total_samples(decoder) > 0 && sample >= FLAC__stream_decoder_get_total_samples(decoder))
+		return false;
+
+	decoder->private_->is_seeking = true;
+
+	/* turn off md5 checking if a seek is attempted */
+	decoder->private_->do_md5_checking = false;
+
+	/* get the file length (currently our algorithm needs to know the length so it's also an error to get FLAC__STREAM_DECODER_LENGTH_STATUS_UNSUPPORTED) */
+	if(decoder->private_->length_callback(decoder, &length, decoder->private_->client_data) != FLAC__STREAM_DECODER_LENGTH_STATUS_OK) {
+		decoder->private_->is_seeking = false;
+		return false;
+	}
+
+	/* if we haven't finished processing the metadata yet, do that so we have the STREAMINFO, SEEK_TABLE, and first_frame_offset */
+	if(
+		decoder->protected_->state == FLAC__STREAM_DECODER_SEARCH_FOR_METADATA ||
+		decoder->protected_->state == FLAC__STREAM_DECODER_READ_METADATA
+	) {
+		if(!FLAC__stream_decoder_process_until_end_of_metadata(decoder)) {
+			/* above call sets the state for us */
+			decoder->private_->is_seeking = false;
+			return false;
+		}
+		/* check this again in case we didn't know total_samples the first time */
+		if(FLAC__stream_decoder_get_total_samples(decoder) > 0 && sample >= FLAC__stream_decoder_get_total_samples(decoder)) {
+			decoder->private_->is_seeking = false;
+			return false;
+		}
+	}
+
+	{
+		const FLAC__bool ok =
+#if FLAC__HAS_OGG
+			decoder->private_->is_ogg?
+			seek_to_absolute_sample_ogg_(decoder, length, sample) :
+#endif
+			seek_to_absolute_sample_(decoder, length, sample)
+		;
+		decoder->private_->is_seeking = false;
+		return ok;
+	}
+}
+
 /***********************************************************************
  *
  * Protected class methods
@@ -758,7 +1241,9 @@ FLAC_API FLAC__bool FLAC__stream_decoder_skip_single_frame(FLAC__StreamDecoder *
 unsigned FLAC__stream_decoder_get_input_bytes_unconsumed(const FLAC__StreamDecoder *decoder)
 {
 	FLAC__ASSERT(0 != decoder);
-	return FLAC__bitbuffer_get_input_bytes_unconsumed(decoder->private_->input);
+	FLAC__ASSERT(FLAC__bitreader_is_consumed_byte_aligned(decoder->private_->input));
+	FLAC__ASSERT(!(FLAC__bitreader_get_input_bits_unconsumed(decoder->private_->input) & 7));
+	return FLAC__bitreader_get_input_bits_unconsumed(decoder->private_->input) / 8;
 }
 
 /***********************************************************************
@@ -769,7 +1254,14 @@ unsigned FLAC__stream_decoder_get_input_bytes_unconsumed(const FLAC__StreamDecod
 
 void set_defaults_(FLAC__StreamDecoder *decoder)
 {
+#if FLAC__HAS_OGG
+	decoder->private_->is_ogg = false;
+#endif
 	decoder->private_->read_callback = 0;
+	decoder->private_->seek_callback = 0;
+	decoder->private_->tell_callback = 0;
+	decoder->private_->length_callback = 0;
+	decoder->private_->eof_callback = 0;
 	decoder->private_->write_callback = 0;
 	decoder->private_->metadata_callback = 0;
 	decoder->private_->error_callback = 0;
@@ -778,6 +1270,33 @@ void set_defaults_(FLAC__StreamDecoder *decoder)
 	memset(decoder->private_->metadata_filter, 0, sizeof(decoder->private_->metadata_filter));
 	decoder->private_->metadata_filter[FLAC__METADATA_TYPE_STREAMINFO] = true;
 	decoder->private_->metadata_filter_ids_count = 0;
+
+	decoder->protected_->md5_checking = false;
+
+#if FLAC__HAS_OGG
+	FLAC__ogg_decoder_aspect_set_defaults(&decoder->protected_->ogg_decoder_aspect);
+#endif
+}
+
+/*
+ * This will forcibly set stdin to binary mode (for OSes that require it)
+ */
+FILE *get_binary_stdin_(void)
+{
+	/* if something breaks here it is probably due to the presence or
+	 * absence of an underscore before the identifiers 'setmode',
+	 * 'fileno', and/or 'O_BINARY'; check your system header files.
+	 */
+#if defined _MSC_VER || defined __MINGW32__
+	_setmode(_fileno(stdin), _O_BINARY);
+#elif defined __CYGWIN__ 
+	/* almost certainly not needed for any modern Cygwin, but let's be safe... */
+	setmode(_fileno(stdin), _O_BINARY);
+#elif defined __EMX__
+	setmode(fileno(stdin), O_BINARY);
+#endif
+
+	return stdin;
 }
 
 FLAC__bool allocate_output_(FLAC__StreamDecoder *decoder, unsigned size, unsigned channels)
@@ -808,7 +1327,7 @@ FLAC__bool allocate_output_(FLAC__StreamDecoder *decoder, unsigned size, unsigne
 		 * (at negative indices) for alignment purposes; we use 4
 		 * to keep the data well-aligned.
 		 */
-		tmp = (FLAC__int32*)malloc(sizeof(FLAC__int32)*(size+4));
+		tmp = (FLAC__int32*)safe_malloc_muladd2_(sizeof(FLAC__int32), /*times (*/size, /*+*/4/*)*/);
 		if(tmp == 0) {
 			decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 			return false;
@@ -833,7 +1352,7 @@ FLAC__bool allocate_output_(FLAC__StreamDecoder *decoder, unsigned size, unsigne
 
 FLAC__bool has_id_filtered_(FLAC__StreamDecoder *decoder, FLAC__byte *id)
 {
-	unsigned i;
+	size_t i;
 
 	FLAC__ASSERT(0 != decoder);
 	FLAC__ASSERT(0 != decoder->private_);
@@ -851,7 +1370,7 @@ FLAC__bool find_metadata_(FLAC__StreamDecoder *decoder)
 	unsigned i, id;
 	FLAC__bool first = true;
 
-	FLAC__ASSERT(FLAC__bitbuffer_is_consumed_byte_aligned(decoder->private_->input));
+	FLAC__ASSERT(FLAC__bitreader_is_consumed_byte_aligned(decoder->private_->input));
 
 	for(i = id = 0; i < 4; ) {
 		if(decoder->private_->cached) {
@@ -859,8 +1378,8 @@ FLAC__bool find_metadata_(FLAC__StreamDecoder *decoder)
 			decoder->private_->cached = false;
 		}
 		else {
-			if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, 8, read_callback_, decoder))
-				return false; /* the read_callback_ sets the state for us */
+			if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, 8))
+				return false; /* read_callback_ sets the state for us */
 		}
 		if(x == FLAC__STREAM_SYNC_STRING[i]) {
 			first = true;
@@ -873,14 +1392,15 @@ FLAC__bool find_metadata_(FLAC__StreamDecoder *decoder)
 			i = 0;
 			if(id == 3) {
 				if(!skip_id3v2_tag_(decoder))
-					return false; /* the read_callback_ sets the state for us */
+					return false; /* skip_id3v2_tag_ sets the state for us */
 			}
 			continue;
 		}
+		id = 0;
 		if(x == 0xff) { /* MAGIC NUMBER for the first 8 frame sync bits */
 			decoder->private_->header_warmup[0] = (FLAC__byte)x;
-			if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, 8, read_callback_, decoder))
-				return false; /* the read_callback_ sets the state for us */
+			if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, 8))
+				return false; /* read_callback_ sets the state for us */
 
 			/* we have to check if we just read two 0xff's in a row; the second may actually be the beginning of the sync code */
 			/* else we have to check if the second byte is the end of a sync code */
@@ -896,7 +1416,7 @@ FLAC__bool find_metadata_(FLAC__StreamDecoder *decoder)
 		}
 		i = 0;
 		if(first) {
-			decoder->private_->error_callback(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC, decoder->private_->client_data);
+			send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC);
 			first = false;
 		}
 	}
@@ -910,24 +1430,26 @@ FLAC__bool read_metadata_(FLAC__StreamDecoder *decoder)
 	FLAC__bool is_last;
 	FLAC__uint32 i, x, type, length;
 
-	FLAC__ASSERT(FLAC__bitbuffer_is_consumed_byte_aligned(decoder->private_->input));
+	FLAC__ASSERT(FLAC__bitreader_is_consumed_byte_aligned(decoder->private_->input));
 
-	if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_IS_LAST_LEN, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_IS_LAST_LEN))
+		return false; /* read_callback_ sets the state for us */
 	is_last = x? true : false;
 
-	if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &type, FLAC__STREAM_METADATA_TYPE_LEN, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &type, FLAC__STREAM_METADATA_TYPE_LEN))
+		return false; /* read_callback_ sets the state for us */
 
-	if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &length, FLAC__STREAM_METADATA_LENGTH_LEN, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &length, FLAC__STREAM_METADATA_LENGTH_LEN))
+		return false; /* read_callback_ sets the state for us */
 
 	if(type == FLAC__METADATA_TYPE_STREAMINFO) {
 		if(!read_metadata_streaminfo_(decoder, is_last, length))
 			return false;
 
 		decoder->private_->has_stream_info = true;
-		if(decoder->private_->metadata_filter[FLAC__METADATA_TYPE_STREAMINFO])
+		if(0 == memcmp(decoder->private_->stream_info.data.stream_info.md5sum, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 16))
+			decoder->private_->do_md5_checking = false;
+		if(!decoder->private_->is_seeking && decoder->private_->metadata_filter[FLAC__METADATA_TYPE_STREAMINFO] && decoder->private_->metadata_callback)
 			decoder->private_->metadata_callback(decoder, &decoder->private_->stream_info, decoder->private_->client_data);
 	}
 	else if(type == FLAC__METADATA_TYPE_SEEKTABLE) {
@@ -935,7 +1457,7 @@ FLAC__bool read_metadata_(FLAC__StreamDecoder *decoder)
 			return false;
 
 		decoder->private_->has_seek_table = true;
-		if(decoder->private_->metadata_filter[FLAC__METADATA_TYPE_SEEKTABLE])
+		if(!decoder->private_->is_seeking && decoder->private_->metadata_filter[FLAC__METADATA_TYPE_SEEKTABLE] && decoder->private_->metadata_callback)
 			decoder->private_->metadata_callback(decoder, &decoder->private_->seek_table, decoder->private_->client_data);
 	}
 	else {
@@ -948,8 +1470,13 @@ FLAC__bool read_metadata_(FLAC__StreamDecoder *decoder)
 		block.length = length;
 
 		if(type == FLAC__METADATA_TYPE_APPLICATION) {
-			if(!FLAC__bitbuffer_read_byte_block_aligned_no_crc(decoder->private_->input, block.data.application.id, FLAC__STREAM_METADATA_APPLICATION_ID_LEN/8, read_callback_, decoder))
-				return false; /* the read_callback_ sets the state for us */
+			if(!FLAC__bitreader_read_byte_block_aligned_no_crc(decoder->private_->input, block.data.application.id, FLAC__STREAM_METADATA_APPLICATION_ID_LEN/8))
+				return false; /* read_callback_ sets the state for us */
+
+			if(real_length < FLAC__STREAM_METADATA_APPLICATION_ID_LEN/8) { /* underflow check */
+				decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;/*@@@@@@ maybe wrong error? need to resync?*/
+				return false;
+			}
 
 			real_length -= FLAC__STREAM_METADATA_APPLICATION_ID_LEN/8;
 
@@ -958,15 +1485,15 @@ FLAC__bool read_metadata_(FLAC__StreamDecoder *decoder)
 		}
 
 		if(skip_it) {
-			if(!FLAC__bitbuffer_read_byte_block_aligned_no_crc(decoder->private_->input, 0, real_length, read_callback_, decoder))
-				return false; /* the read_callback_ sets the state for us */
+			if(!FLAC__bitreader_skip_byte_block_aligned_no_crc(decoder->private_->input, real_length))
+				return false; /* read_callback_ sets the state for us */
 		}
 		else {
 			switch(type) {
 				case FLAC__METADATA_TYPE_PADDING:
 					/* skip the padding bytes */
-					if(!FLAC__bitbuffer_read_byte_block_aligned_no_crc(decoder->private_->input, 0, real_length, read_callback_, decoder))
-						return false; /* the read_callback_ sets the state for us */
+					if(!FLAC__bitreader_skip_byte_block_aligned_no_crc(decoder->private_->input, real_length))
+						return false; /* read_callback_ sets the state for us */
 					break;
 				case FLAC__METADATA_TYPE_APPLICATION:
 					/* remember, we read the ID already */
@@ -975,8 +1502,8 @@ FLAC__bool read_metadata_(FLAC__StreamDecoder *decoder)
 							decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 							return false;
 						}
-						if(!FLAC__bitbuffer_read_byte_block_aligned_no_crc(decoder->private_->input, block.data.application.data, real_length, read_callback_, decoder))
-							return false; /* the read_callback_ sets the state for us */
+						if(!FLAC__bitreader_read_byte_block_aligned_no_crc(decoder->private_->input, block.data.application.data, real_length))
+							return false; /* read_callback_ sets the state for us */
 					}
 					else
 						block.data.application.data = 0;
@@ -989,6 +1516,10 @@ FLAC__bool read_metadata_(FLAC__StreamDecoder *decoder)
 					if(!read_metadata_cuesheet_(decoder, &block.data.cue_sheet))
 						return false;
 					break;
+				case FLAC__METADATA_TYPE_PICTURE:
+					if(!read_metadata_picture_(decoder, &block.data.picture))
+						return false;
+					break;
 				case FLAC__METADATA_TYPE_STREAMINFO:
 				case FLAC__METADATA_TYPE_SEEKTABLE:
 					FLAC__ASSERT(0);
@@ -999,16 +1530,17 @@ FLAC__bool read_metadata_(FLAC__StreamDecoder *decoder)
 							decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 							return false;
 						}
-						if(!FLAC__bitbuffer_read_byte_block_aligned_no_crc(decoder->private_->input, block.data.unknown.data, real_length, read_callback_, decoder))
-							return false; /* the read_callback_ sets the state for us */
+						if(!FLAC__bitreader_read_byte_block_aligned_no_crc(decoder->private_->input, block.data.unknown.data, real_length))
+							return false; /* read_callback_ sets the state for us */
 					}
 					else
 						block.data.unknown.data = 0;
 					break;
 			}
-			decoder->private_->metadata_callback(decoder, &block, decoder->private_->client_data);
+			if(!decoder->private_->is_seeking && decoder->private_->metadata_callback)
+				decoder->private_->metadata_callback(decoder, &block, decoder->private_->client_data);
 
-			/* now we have to free any malloc'ed data in the block */
+			/* now we have to free any malloc()ed data in the block */
 			switch(type) {
 				case FLAC__METADATA_TYPE_PADDING:
 					break;
@@ -1034,6 +1566,14 @@ FLAC__bool read_metadata_(FLAC__StreamDecoder *decoder)
 					if(0 != block.data.cue_sheet.tracks)
 						free(block.data.cue_sheet.tracks);
 					break;
+				case FLAC__METADATA_TYPE_PICTURE:
+					if(0 != block.data.picture.mime_type)
+						free(block.data.picture.mime_type);
+					if(0 != block.data.picture.description)
+						free(block.data.picture.description);
+					if(0 != block.data.picture.data)
+						free(block.data.picture.data);
+					break;
 				case FLAC__METADATA_TYPE_STREAMINFO:
 				case FLAC__METADATA_TYPE_SEEKTABLE:
 					FLAC__ASSERT(0);
@@ -1045,8 +1585,12 @@ FLAC__bool read_metadata_(FLAC__StreamDecoder *decoder)
 		}
 	}
 
-	if(is_last)
+	if(is_last) {
+		/* if this fails, it's OK, it's just a hint for the seek routine */
+		if(!FLAC__stream_decoder_get_decode_position(decoder, &decoder->private_->first_frame_offset))
+			decoder->private_->first_frame_offset = 0;
 		decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
+	}
 
 	return true;
 }
@@ -1056,68 +1600,68 @@ FLAC__bool read_metadata_streaminfo_(FLAC__StreamDecoder *decoder, FLAC__bool is
 	FLAC__uint32 x;
 	unsigned bits, used_bits = 0;
 
-	FLAC__ASSERT(FLAC__bitbuffer_is_consumed_byte_aligned(decoder->private_->input));
+	FLAC__ASSERT(FLAC__bitreader_is_consumed_byte_aligned(decoder->private_->input));
 
 	decoder->private_->stream_info.type = FLAC__METADATA_TYPE_STREAMINFO;
 	decoder->private_->stream_info.is_last = is_last;
 	decoder->private_->stream_info.length = length;
 
 	bits = FLAC__STREAM_METADATA_STREAMINFO_MIN_BLOCK_SIZE_LEN;
-	if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, bits, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, bits))
+		return false; /* read_callback_ sets the state for us */
 	decoder->private_->stream_info.data.stream_info.min_blocksize = x;
 	used_bits += bits;
 
 	bits = FLAC__STREAM_METADATA_STREAMINFO_MAX_BLOCK_SIZE_LEN;
-	if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_STREAMINFO_MAX_BLOCK_SIZE_LEN, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_STREAMINFO_MAX_BLOCK_SIZE_LEN))
+		return false; /* read_callback_ sets the state for us */
 	decoder->private_->stream_info.data.stream_info.max_blocksize = x;
 	used_bits += bits;
 
 	bits = FLAC__STREAM_METADATA_STREAMINFO_MIN_FRAME_SIZE_LEN;
-	if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_STREAMINFO_MIN_FRAME_SIZE_LEN, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_STREAMINFO_MIN_FRAME_SIZE_LEN))
+		return false; /* read_callback_ sets the state for us */
 	decoder->private_->stream_info.data.stream_info.min_framesize = x;
 	used_bits += bits;
 
 	bits = FLAC__STREAM_METADATA_STREAMINFO_MAX_FRAME_SIZE_LEN;
-	if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_STREAMINFO_MAX_FRAME_SIZE_LEN, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_STREAMINFO_MAX_FRAME_SIZE_LEN))
+		return false; /* read_callback_ sets the state for us */
 	decoder->private_->stream_info.data.stream_info.max_framesize = x;
 	used_bits += bits;
 
 	bits = FLAC__STREAM_METADATA_STREAMINFO_SAMPLE_RATE_LEN;
-	if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_STREAMINFO_SAMPLE_RATE_LEN, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_STREAMINFO_SAMPLE_RATE_LEN))
+		return false; /* read_callback_ sets the state for us */
 	decoder->private_->stream_info.data.stream_info.sample_rate = x;
 	used_bits += bits;
 
 	bits = FLAC__STREAM_METADATA_STREAMINFO_CHANNELS_LEN;
-	if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_STREAMINFO_CHANNELS_LEN, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_STREAMINFO_CHANNELS_LEN))
+		return false; /* read_callback_ sets the state for us */
 	decoder->private_->stream_info.data.stream_info.channels = x+1;
 	used_bits += bits;
 
 	bits = FLAC__STREAM_METADATA_STREAMINFO_BITS_PER_SAMPLE_LEN;
-	if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_STREAMINFO_BITS_PER_SAMPLE_LEN, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_STREAMINFO_BITS_PER_SAMPLE_LEN))
+		return false; /* read_callback_ sets the state for us */
 	decoder->private_->stream_info.data.stream_info.bits_per_sample = x+1;
 	used_bits += bits;
 
 	bits = FLAC__STREAM_METADATA_STREAMINFO_TOTAL_SAMPLES_LEN;
-	if(!FLAC__bitbuffer_read_raw_uint64(decoder->private_->input, &decoder->private_->stream_info.data.stream_info.total_samples, FLAC__STREAM_METADATA_STREAMINFO_TOTAL_SAMPLES_LEN, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_uint64(decoder->private_->input, &decoder->private_->stream_info.data.stream_info.total_samples, FLAC__STREAM_METADATA_STREAMINFO_TOTAL_SAMPLES_LEN))
+		return false; /* read_callback_ sets the state for us */
 	used_bits += bits;
 
-	if(!FLAC__bitbuffer_read_byte_block_aligned_no_crc(decoder->private_->input, decoder->private_->stream_info.data.stream_info.md5sum, 16, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_byte_block_aligned_no_crc(decoder->private_->input, decoder->private_->stream_info.data.stream_info.md5sum, 16))
+		return false; /* read_callback_ sets the state for us */
 	used_bits += 16*8;
 
 	/* skip the rest of the block */
 	FLAC__ASSERT(used_bits % 8 == 0);
 	length -= (used_bits / 8);
-	if(!FLAC__bitbuffer_read_byte_block_aligned_no_crc(decoder->private_->input, 0, length, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_skip_byte_block_aligned_no_crc(decoder->private_->input, length))
+		return false; /* read_callback_ sets the state for us */
 
 	return true;
 }
@@ -1127,7 +1671,7 @@ FLAC__bool read_metadata_seektable_(FLAC__StreamDecoder *decoder, FLAC__bool is_
 	FLAC__uint32 i, x;
 	FLAC__uint64 xx;
 
-	FLAC__ASSERT(FLAC__bitbuffer_is_consumed_byte_aligned(decoder->private_->input));
+	FLAC__ASSERT(FLAC__bitreader_is_consumed_byte_aligned(decoder->private_->input));
 
 	decoder->private_->seek_table.type = FLAC__METADATA_TYPE_SEEKTABLE;
 	decoder->private_->seek_table.is_last = is_last;
@@ -1136,29 +1680,29 @@ FLAC__bool read_metadata_seektable_(FLAC__StreamDecoder *decoder, FLAC__bool is_
 	decoder->private_->seek_table.data.seek_table.num_points = length / FLAC__STREAM_METADATA_SEEKPOINT_LENGTH;
 
 	/* use realloc since we may pass through here several times (e.g. after seeking) */
-	if(0 == (decoder->private_->seek_table.data.seek_table.points = (FLAC__StreamMetadata_SeekPoint*)realloc(decoder->private_->seek_table.data.seek_table.points, decoder->private_->seek_table.data.seek_table.num_points * sizeof(FLAC__StreamMetadata_SeekPoint)))) {
+	if(0 == (decoder->private_->seek_table.data.seek_table.points = (FLAC__StreamMetadata_SeekPoint*)safe_realloc_mul_2op_(decoder->private_->seek_table.data.seek_table.points, decoder->private_->seek_table.data.seek_table.num_points, /*times*/sizeof(FLAC__StreamMetadata_SeekPoint)))) {
 		decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 		return false;
 	}
 	for(i = 0; i < decoder->private_->seek_table.data.seek_table.num_points; i++) {
-		if(!FLAC__bitbuffer_read_raw_uint64(decoder->private_->input, &xx, FLAC__STREAM_METADATA_SEEKPOINT_SAMPLE_NUMBER_LEN, read_callback_, decoder))
-			return false; /* the read_callback_ sets the state for us */
+		if(!FLAC__bitreader_read_raw_uint64(decoder->private_->input, &xx, FLAC__STREAM_METADATA_SEEKPOINT_SAMPLE_NUMBER_LEN))
+			return false; /* read_callback_ sets the state for us */
 		decoder->private_->seek_table.data.seek_table.points[i].sample_number = xx;
 
-		if(!FLAC__bitbuffer_read_raw_uint64(decoder->private_->input, &xx, FLAC__STREAM_METADATA_SEEKPOINT_STREAM_OFFSET_LEN, read_callback_, decoder))
-			return false; /* the read_callback_ sets the state for us */
+		if(!FLAC__bitreader_read_raw_uint64(decoder->private_->input, &xx, FLAC__STREAM_METADATA_SEEKPOINT_STREAM_OFFSET_LEN))
+			return false; /* read_callback_ sets the state for us */
 		decoder->private_->seek_table.data.seek_table.points[i].stream_offset = xx;
 
-		if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_SEEKPOINT_FRAME_SAMPLES_LEN, read_callback_, decoder))
-			return false; /* the read_callback_ sets the state for us */
+		if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_SEEKPOINT_FRAME_SAMPLES_LEN))
+			return false; /* read_callback_ sets the state for us */
 		decoder->private_->seek_table.data.seek_table.points[i].frame_samples = x;
 	}
 	length -= (decoder->private_->seek_table.data.seek_table.num_points * FLAC__STREAM_METADATA_SEEKPOINT_LENGTH);
 	/* if there is a partial point left, skip over it */
 	if(length > 0) {
-		/*@@@ do an error_callback() here?  there's an argument for either way */
-		if(!FLAC__bitbuffer_read_byte_block_aligned_no_crc(decoder->private_->input, 0, length, read_callback_, decoder))
-			return false; /* the read_callback_ sets the state for us */
+		/*@@@ do a send_error_to_client_() here?  there's an argument for either way */
+		if(!FLAC__bitreader_skip_byte_block_aligned_no_crc(decoder->private_->input, length))
+			return false; /* read_callback_ sets the state for us */
 	}
 
 	return true;
@@ -1168,19 +1712,19 @@ FLAC__bool read_metadata_vorbiscomment_(FLAC__StreamDecoder *decoder, FLAC__Stre
 {
 	FLAC__uint32 i;
 
-	FLAC__ASSERT(FLAC__bitbuffer_is_consumed_byte_aligned(decoder->private_->input));
+	FLAC__ASSERT(FLAC__bitreader_is_consumed_byte_aligned(decoder->private_->input));
 
 	/* read vendor string */
 	FLAC__ASSERT(FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN == 32);
-	if(!FLAC__bitbuffer_read_raw_uint32_little_endian(decoder->private_->input, &obj->vendor_string.length, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_uint32_little_endian(decoder->private_->input, &obj->vendor_string.length))
+		return false; /* read_callback_ sets the state for us */
 	if(obj->vendor_string.length > 0) {
-		if(0 == (obj->vendor_string.entry = (FLAC__byte*)malloc(obj->vendor_string.length+1))) {
+		if(0 == (obj->vendor_string.entry = (FLAC__byte*)safe_malloc_add_2op_(obj->vendor_string.length, /*+*/1))) {
 			decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 			return false;
 		}
-		if(!FLAC__bitbuffer_read_byte_block_aligned_no_crc(decoder->private_->input, obj->vendor_string.entry, obj->vendor_string.length, read_callback_, decoder))
-			return false; /* the read_callback_ sets the state for us */
+		if(!FLAC__bitreader_read_byte_block_aligned_no_crc(decoder->private_->input, obj->vendor_string.entry, obj->vendor_string.length))
+			return false; /* read_callback_ sets the state for us */
 		obj->vendor_string.entry[obj->vendor_string.length] = '\0';
 	}
 	else
@@ -1188,26 +1732,26 @@ FLAC__bool read_metadata_vorbiscomment_(FLAC__StreamDecoder *decoder, FLAC__Stre
 
 	/* read num comments */
 	FLAC__ASSERT(FLAC__STREAM_METADATA_VORBIS_COMMENT_NUM_COMMENTS_LEN == 32);
-	if(!FLAC__bitbuffer_read_raw_uint32_little_endian(decoder->private_->input, &obj->num_comments, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_uint32_little_endian(decoder->private_->input, &obj->num_comments))
+		return false; /* read_callback_ sets the state for us */
 
 	/* read comments */
 	if(obj->num_comments > 0) {
-		if(0 == (obj->comments = (FLAC__StreamMetadata_VorbisComment_Entry*)malloc(obj->num_comments * sizeof(FLAC__StreamMetadata_VorbisComment_Entry)))) {
+		if(0 == (obj->comments = (FLAC__StreamMetadata_VorbisComment_Entry*)safe_malloc_mul_2op_(obj->num_comments, /*times*/sizeof(FLAC__StreamMetadata_VorbisComment_Entry)))) {
 			decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 			return false;
 		}
 		for(i = 0; i < obj->num_comments; i++) {
 			FLAC__ASSERT(FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN == 32);
-			if(!FLAC__bitbuffer_read_raw_uint32_little_endian(decoder->private_->input, &obj->comments[i].length, read_callback_, decoder))
-				return false; /* the read_callback_ sets the state for us */
+			if(!FLAC__bitreader_read_uint32_little_endian(decoder->private_->input, &obj->comments[i].length))
+				return false; /* read_callback_ sets the state for us */
 			if(obj->comments[i].length > 0) {
-				if(0 == (obj->comments[i].entry = (FLAC__byte*)malloc(obj->comments[i].length+1))) {
+				if(0 == (obj->comments[i].entry = (FLAC__byte*)safe_malloc_add_2op_(obj->comments[i].length, /*+*/1))) {
 					decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 					return false;
 				}
-				if(!FLAC__bitbuffer_read_byte_block_aligned_no_crc(decoder->private_->input, obj->comments[i].entry, obj->comments[i].length, read_callback_, decoder))
-					return false; /* the read_callback_ sets the state for us */
+				if(!FLAC__bitreader_read_byte_block_aligned_no_crc(decoder->private_->input, obj->comments[i].entry, obj->comments[i].length))
+					return false; /* read_callback_ sets the state for us */
 				obj->comments[i].entry[obj->comments[i].length] = '\0';
 			}
 			else
@@ -1225,77 +1769,77 @@ FLAC__bool read_metadata_cuesheet_(FLAC__StreamDecoder *decoder, FLAC__StreamMet
 {
 	FLAC__uint32 i, j, x;
 
-	FLAC__ASSERT(FLAC__bitbuffer_is_consumed_byte_aligned(decoder->private_->input));
+	FLAC__ASSERT(FLAC__bitreader_is_consumed_byte_aligned(decoder->private_->input));
 
 	memset(obj, 0, sizeof(FLAC__StreamMetadata_CueSheet));
 
 	FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_MEDIA_CATALOG_NUMBER_LEN % 8 == 0);
-	if(!FLAC__bitbuffer_read_byte_block_aligned_no_crc(decoder->private_->input, (FLAC__byte*)obj->media_catalog_number, FLAC__STREAM_METADATA_CUESHEET_MEDIA_CATALOG_NUMBER_LEN/8, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_byte_block_aligned_no_crc(decoder->private_->input, (FLAC__byte*)obj->media_catalog_number, FLAC__STREAM_METADATA_CUESHEET_MEDIA_CATALOG_NUMBER_LEN/8))
+		return false; /* read_callback_ sets the state for us */
 
-	if(!FLAC__bitbuffer_read_raw_uint64(decoder->private_->input, &obj->lead_in, FLAC__STREAM_METADATA_CUESHEET_LEAD_IN_LEN, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_uint64(decoder->private_->input, &obj->lead_in, FLAC__STREAM_METADATA_CUESHEET_LEAD_IN_LEN))
+		return false; /* read_callback_ sets the state for us */
 
-	if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_CUESHEET_IS_CD_LEN, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_CUESHEET_IS_CD_LEN))
+		return false; /* read_callback_ sets the state for us */
 	obj->is_cd = x? true : false;
 
-	if(!FLAC__bitbuffer_skip_bits_no_crc(decoder->private_->input, FLAC__STREAM_METADATA_CUESHEET_RESERVED_LEN, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_skip_bits_no_crc(decoder->private_->input, FLAC__STREAM_METADATA_CUESHEET_RESERVED_LEN))
+		return false; /* read_callback_ sets the state for us */
 
-	if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_CUESHEET_NUM_TRACKS_LEN, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_CUESHEET_NUM_TRACKS_LEN))
+		return false; /* read_callback_ sets the state for us */
 	obj->num_tracks = x;
 
 	if(obj->num_tracks > 0) {
-		if(0 == (obj->tracks = (FLAC__StreamMetadata_CueSheet_Track*)calloc(obj->num_tracks, sizeof(FLAC__StreamMetadata_CueSheet_Track)))) {
+		if(0 == (obj->tracks = (FLAC__StreamMetadata_CueSheet_Track*)safe_calloc_(obj->num_tracks, sizeof(FLAC__StreamMetadata_CueSheet_Track)))) {
 			decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 			return false;
 		}
 		for(i = 0; i < obj->num_tracks; i++) {
 			FLAC__StreamMetadata_CueSheet_Track *track = &obj->tracks[i];
-			if(!FLAC__bitbuffer_read_raw_uint64(decoder->private_->input, &track->offset, FLAC__STREAM_METADATA_CUESHEET_TRACK_OFFSET_LEN, read_callback_, decoder))
-				return false; /* the read_callback_ sets the state for us */
+			if(!FLAC__bitreader_read_raw_uint64(decoder->private_->input, &track->offset, FLAC__STREAM_METADATA_CUESHEET_TRACK_OFFSET_LEN))
+				return false; /* read_callback_ sets the state for us */
 
-			if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_CUESHEET_TRACK_NUMBER_LEN, read_callback_, decoder))
-				return false; /* the read_callback_ sets the state for us */
+			if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_CUESHEET_TRACK_NUMBER_LEN))
+				return false; /* read_callback_ sets the state for us */
 			track->number = (FLAC__byte)x;
 
 			FLAC__ASSERT(FLAC__STREAM_METADATA_CUESHEET_TRACK_ISRC_LEN % 8 == 0);
-			if(!FLAC__bitbuffer_read_byte_block_aligned_no_crc(decoder->private_->input, (FLAC__byte*)track->isrc, FLAC__STREAM_METADATA_CUESHEET_TRACK_ISRC_LEN/8, read_callback_, decoder))
-				return false; /* the read_callback_ sets the state for us */
+			if(!FLAC__bitreader_read_byte_block_aligned_no_crc(decoder->private_->input, (FLAC__byte*)track->isrc, FLAC__STREAM_METADATA_CUESHEET_TRACK_ISRC_LEN/8))
+				return false; /* read_callback_ sets the state for us */
 
-			if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_CUESHEET_TRACK_TYPE_LEN, read_callback_, decoder))
-				return false; /* the read_callback_ sets the state for us */
+			if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_CUESHEET_TRACK_TYPE_LEN))
+				return false; /* read_callback_ sets the state for us */
 			track->type = x;
 
-			if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_CUESHEET_TRACK_PRE_EMPHASIS_LEN, read_callback_, decoder))
-				return false; /* the read_callback_ sets the state for us */
+			if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_CUESHEET_TRACK_PRE_EMPHASIS_LEN))
+				return false; /* read_callback_ sets the state for us */
 			track->pre_emphasis = x;
 
-			if(!FLAC__bitbuffer_skip_bits_no_crc(decoder->private_->input, FLAC__STREAM_METADATA_CUESHEET_TRACK_RESERVED_LEN, read_callback_, decoder))
-				return false; /* the read_callback_ sets the state for us */
+			if(!FLAC__bitreader_skip_bits_no_crc(decoder->private_->input, FLAC__STREAM_METADATA_CUESHEET_TRACK_RESERVED_LEN))
+				return false; /* read_callback_ sets the state for us */
 
-			if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_CUESHEET_TRACK_NUM_INDICES_LEN, read_callback_, decoder))
-				return false; /* the read_callback_ sets the state for us */
+			if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_CUESHEET_TRACK_NUM_INDICES_LEN))
+				return false; /* read_callback_ sets the state for us */
 			track->num_indices = (FLAC__byte)x;
 
 			if(track->num_indices > 0) {
-				if(0 == (track->indices = (FLAC__StreamMetadata_CueSheet_Index*)calloc(track->num_indices, sizeof(FLAC__StreamMetadata_CueSheet_Index)))) {
+				if(0 == (track->indices = (FLAC__StreamMetadata_CueSheet_Index*)safe_calloc_(track->num_indices, sizeof(FLAC__StreamMetadata_CueSheet_Index)))) {
 					decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 					return false;
 				}
 				for(j = 0; j < track->num_indices; j++) {
 					FLAC__StreamMetadata_CueSheet_Index *index = &track->indices[j];
-					if(!FLAC__bitbuffer_read_raw_uint64(decoder->private_->input, &index->offset, FLAC__STREAM_METADATA_CUESHEET_INDEX_OFFSET_LEN, read_callback_, decoder))
-						return false; /* the read_callback_ sets the state for us */
+					if(!FLAC__bitreader_read_raw_uint64(decoder->private_->input, &index->offset, FLAC__STREAM_METADATA_CUESHEET_INDEX_OFFSET_LEN))
+						return false; /* read_callback_ sets the state for us */
 
-					if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_CUESHEET_INDEX_NUMBER_LEN, read_callback_, decoder))
-						return false; /* the read_callback_ sets the state for us */
+					if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_CUESHEET_INDEX_NUMBER_LEN))
+						return false; /* read_callback_ sets the state for us */
 					index->number = (FLAC__byte)x;
 
-					if(!FLAC__bitbuffer_skip_bits_no_crc(decoder->private_->input, FLAC__STREAM_METADATA_CUESHEET_INDEX_RESERVED_LEN, read_callback_, decoder))
-						return false; /* the read_callback_ sets the state for us */
+					if(!FLAC__bitreader_skip_bits_no_crc(decoder->private_->input, FLAC__STREAM_METADATA_CUESHEET_INDEX_RESERVED_LEN))
+						return false; /* read_callback_ sets the state for us */
 				}
 			}
 		}
@@ -1304,25 +1848,93 @@ FLAC__bool read_metadata_cuesheet_(FLAC__StreamDecoder *decoder, FLAC__StreamMet
 	return true;
 }
 
+FLAC__bool read_metadata_picture_(FLAC__StreamDecoder *decoder, FLAC__StreamMetadata_Picture *obj)
+{
+	FLAC__uint32 x;
+
+	FLAC__ASSERT(FLAC__bitreader_is_consumed_byte_aligned(decoder->private_->input));
+
+	/* read type */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_PICTURE_TYPE_LEN))
+		return false; /* read_callback_ sets the state for us */
+	obj->type = x;
+
+	/* read MIME type */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_PICTURE_MIME_TYPE_LENGTH_LEN))
+		return false; /* read_callback_ sets the state for us */
+	if(0 == (obj->mime_type = (char*)safe_malloc_add_2op_(x, /*+*/1))) {
+		decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
+		return false;
+	}
+	if(x > 0) {
+		if(!FLAC__bitreader_read_byte_block_aligned_no_crc(decoder->private_->input, (FLAC__byte*)obj->mime_type, x))
+			return false; /* read_callback_ sets the state for us */
+	}
+	obj->mime_type[x] = '\0';
+
+	/* read description */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_PICTURE_DESCRIPTION_LENGTH_LEN))
+		return false; /* read_callback_ sets the state for us */
+	if(0 == (obj->description = (FLAC__byte*)safe_malloc_add_2op_(x, /*+*/1))) {
+		decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
+		return false;
+	}
+	if(x > 0) {
+		if(!FLAC__bitreader_read_byte_block_aligned_no_crc(decoder->private_->input, obj->description, x))
+			return false; /* read_callback_ sets the state for us */
+	}
+	obj->description[x] = '\0';
+
+	/* read width */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &obj->width, FLAC__STREAM_METADATA_PICTURE_WIDTH_LEN))
+		return false; /* read_callback_ sets the state for us */
+
+	/* read height */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &obj->height, FLAC__STREAM_METADATA_PICTURE_HEIGHT_LEN))
+		return false; /* read_callback_ sets the state for us */
+
+	/* read depth */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &obj->depth, FLAC__STREAM_METADATA_PICTURE_DEPTH_LEN))
+		return false; /* read_callback_ sets the state for us */
+
+	/* read colors */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &obj->colors, FLAC__STREAM_METADATA_PICTURE_COLORS_LEN))
+		return false; /* read_callback_ sets the state for us */
+
+	/* read data */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &(obj->data_length), FLAC__STREAM_METADATA_PICTURE_DATA_LENGTH_LEN))
+		return false; /* read_callback_ sets the state for us */
+	if(0 == (obj->data = (FLAC__byte*)safe_malloc_(obj->data_length))) {
+		decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
+		return false;
+	}
+	if(obj->data_length > 0) {
+		if(!FLAC__bitreader_read_byte_block_aligned_no_crc(decoder->private_->input, obj->data, obj->data_length))
+			return false; /* read_callback_ sets the state for us */
+	}
+
+	return true;
+}
+
 FLAC__bool skip_id3v2_tag_(FLAC__StreamDecoder *decoder)
 {
 	FLAC__uint32 x;
 	unsigned i, skip;
 
 	/* skip the version and flags bytes */
-	if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, 24, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, 24))
+		return false; /* read_callback_ sets the state for us */
 	/* get the size (in bytes) to skip */
 	skip = 0;
 	for(i = 0; i < 4; i++) {
-		if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, 8, read_callback_, decoder))
-			return false; /* the read_callback_ sets the state for us */
+		if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, 8))
+			return false; /* read_callback_ sets the state for us */
 		skip <<= 7;
 		skip |= (x & 0x7f);
 	}
 	/* skip the rest of the tag */
-	if(!FLAC__bitbuffer_read_byte_block_aligned_no_crc(decoder->private_->input, 0, skip, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_skip_byte_block_aligned_no_crc(decoder->private_->input, skip))
+		return false; /* read_callback_ sets the state for us */
 	return true;
 }
 
@@ -1333,17 +1945,17 @@ FLAC__bool frame_sync_(FLAC__StreamDecoder *decoder)
 
 	/* If we know the total number of samples in the stream, stop if we've read that many. */
 	/* This will stop us, for example, from wasting time trying to sync on an ID3V1 tag. */
-	if(decoder->private_->has_stream_info && decoder->private_->stream_info.data.stream_info.total_samples) {
-		if(decoder->private_->samples_decoded >= decoder->private_->stream_info.data.stream_info.total_samples) {
+	if(FLAC__stream_decoder_get_total_samples(decoder) > 0) {
+		if(decoder->private_->samples_decoded >= FLAC__stream_decoder_get_total_samples(decoder)) {
 			decoder->protected_->state = FLAC__STREAM_DECODER_END_OF_STREAM;
 			return true;
 		}
 	}
 
 	/* make sure we're byte aligned */
-	if(!FLAC__bitbuffer_is_consumed_byte_aligned(decoder->private_->input)) {
-		if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, FLAC__bitbuffer_bits_left_for_byte_alignment(decoder->private_->input), read_callback_, decoder))
-			return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_is_consumed_byte_aligned(decoder->private_->input)) {
+		if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__bitreader_bits_left_for_byte_alignment(decoder->private_->input)))
+			return false; /* read_callback_ sets the state for us */
 	}
 
 	while(1) {
@@ -1352,13 +1964,13 @@ FLAC__bool frame_sync_(FLAC__StreamDecoder *decoder)
 			decoder->private_->cached = false;
 		}
 		else {
-			if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, 8, read_callback_, decoder))
-				return false; /* the read_callback_ sets the state for us */
+			if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, 8))
+				return false; /* read_callback_ sets the state for us */
 		}
 		if(x == 0xff) { /* MAGIC NUMBER for the first 8 frame sync bits */
 			decoder->private_->header_warmup[0] = (FLAC__byte)x;
-			if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, 8, read_callback_, decoder))
-				return false; /* the read_callback_ sets the state for us */
+			if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, 8))
+				return false; /* read_callback_ sets the state for us */
 
 			/* we have to check if we just read two 0xff's in a row; the second may actually be the beginning of the sync code */
 			/* else we have to check if the second byte is the end of a sync code */
@@ -1373,7 +1985,7 @@ FLAC__bool frame_sync_(FLAC__StreamDecoder *decoder)
 			}
 		}
 		if(first) {
-			decoder->private_->error_callback(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC, decoder->private_->client_data);
+			send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC);
 			first = false;
 		}
 	}
@@ -1385,21 +1997,21 @@ FLAC__bool read_frame_(FLAC__StreamDecoder *decoder, FLAC__bool *got_a_frame, FL
 {
 	unsigned channel;
 	unsigned i;
-	FLAC__int32 mid, side, left, right;
-	FLAC__uint16 frame_crc; /* the one we calculate from the input stream */
+	FLAC__int32 mid, side;
+	unsigned frame_crc; /* the one we calculate from the input stream */
 	FLAC__uint32 x;
 
 	*got_a_frame = false;
 
 	/* init the CRC */
 	frame_crc = 0;
-	FLAC__CRC16_UPDATE(decoder->private_->header_warmup[0], frame_crc);
-	FLAC__CRC16_UPDATE(decoder->private_->header_warmup[1], frame_crc);
-	FLAC__bitbuffer_reset_read_crc16(decoder->private_->input, frame_crc);
+	frame_crc = FLAC__CRC16_UPDATE(decoder->private_->header_warmup[0], frame_crc);
+	frame_crc = FLAC__CRC16_UPDATE(decoder->private_->header_warmup[1], frame_crc);
+	FLAC__bitreader_reset_read_crc16(decoder->private_->input, (FLAC__uint16)frame_crc);
 
 	if(!read_frame_header_(decoder))
 		return false;
-	if(decoder->protected_->state == FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC)
+	if(decoder->protected_->state == FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC) /* means we didn't sync on a valid header */
 		return true;
 	if(!allocate_output_(decoder, decoder->private_->frame.header.blocksize, decoder->private_->frame.header.channels))
 		return false;
@@ -1435,21 +2047,21 @@ FLAC__bool read_frame_(FLAC__StreamDecoder *decoder, FLAC__bool *got_a_frame, FL
 		 */
 		if(!read_subframe_(decoder, channel, bps, do_full_decode))
 			return false;
-		if(decoder->protected_->state != FLAC__STREAM_DECODER_READ_FRAME) {
-			decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
+		if(decoder->protected_->state == FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC) /* means bad sync or got corruption */
 			return true;
-		}
 	}
 	if(!read_zero_padding_(decoder))
 		return false;
+	if(decoder->protected_->state == FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC) /* means bad sync or got corruption (i.e. "zero bits" were not all zeroes) */
+		return true;
 
 	/*
 	 * Read the frame CRC-16 from the footer and check
 	 */
-	frame_crc = FLAC__bitbuffer_get_read_crc16(decoder->private_->input);
-	if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, FLAC__FRAME_FOOTER_CRC_LEN, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
-	if(frame_crc == (FLAC__uint16)x) {
+	frame_crc = FLAC__bitreader_get_read_crc16(decoder->private_->input);
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__FRAME_FOOTER_CRC_LEN))
+		return false; /* read_callback_ sets the state for us */
+	if(frame_crc == x) {
 		if(do_full_decode) {
 			/* Undo any special channel coding */
 			switch(decoder->private_->frame.header.channel_assignment) {
@@ -1469,15 +2081,19 @@ FLAC__bool read_frame_(FLAC__StreamDecoder *decoder, FLAC__bool *got_a_frame, FL
 				case FLAC__CHANNEL_ASSIGNMENT_MID_SIDE:
 					FLAC__ASSERT(decoder->private_->frame.header.channels == 2);
 					for(i = 0; i < decoder->private_->frame.header.blocksize; i++) {
+#if 1
 						mid = decoder->private_->output[0][i];
 						side = decoder->private_->output[1][i];
 						mid <<= 1;
-						if(side & 1) /* i.e. if 'side' is odd... */
-							mid++;
-						left = mid + side;
-						right = mid - side;
-						decoder->private_->output[0][i] = left >> 1;
-						decoder->private_->output[1][i] = right >> 1;
+						mid |= (side & 1); /* i.e. if 'side' is odd... */
+						decoder->private_->output[0][i] = (mid + side) >> 1;
+						decoder->private_->output[1][i] = (mid - side) >> 1;
+#else
+						/* OPT: without 'side' temp variable */
+						mid = (decoder->private_->output[0][i] << 1) | (decoder->private_->output[1][i] & 1); /* i.e. if 'side' is odd... */
+						decoder->private_->output[0][i] = (mid + decoder->private_->output[1][i]) >> 1;
+						decoder->private_->output[1][i] = (mid - decoder->private_->output[1][i]) >> 1;
+#endif
 					}
 					break;
 				default:
@@ -1488,7 +2104,7 @@ FLAC__bool read_frame_(FLAC__StreamDecoder *decoder, FLAC__bool *got_a_frame, FL
 	}
 	else {
 		/* Bad frame, emit error and zero the output signal */
-		decoder->private_->error_callback(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_FRAME_CRC_MISMATCH, decoder->private_->client_data);
+		send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_FRAME_CRC_MISMATCH);
 		if(do_full_decode) {
 			for(channel = 0; channel < decoder->private_->frame.header.channels; channel++) {
 				memset(decoder->private_->output[channel], 0, sizeof(FLAC__int32) * decoder->private_->frame.header.blocksize);
@@ -1498,6 +2114,10 @@ FLAC__bool read_frame_(FLAC__StreamDecoder *decoder, FLAC__bool *got_a_frame, FL
 
 	*got_a_frame = true;
 
+	/* we wait to update fixed_block_size until here, when we're sure we've got a proper frame and hence a correct blocksize */
+	if(decoder->private_->next_fixed_block_size)
+		decoder->private_->fixed_block_size = decoder->private_->next_fixed_block_size;
+
 	/* put the latest values into the public section of the decoder instance */
 	decoder->protected_->channels = decoder->private_->frame.header.channels;
 	decoder->protected_->channel_assignment = decoder->private_->frame.header.channel_assignment;
@@ -1510,7 +2130,7 @@ FLAC__bool read_frame_(FLAC__StreamDecoder *decoder, FLAC__bool *got_a_frame, FL
 
 	/* write it */
 	if(do_full_decode) {
-		if(decoder->private_->write_callback(decoder, &decoder->private_->frame, (const FLAC__int32 * const *)decoder->private_->output, decoder->private_->client_data) != FLAC__STREAM_DECODER_WRITE_STATUS_CONTINUE)
+		if(write_audio_frame_to_client_(decoder, &decoder->private_->frame, (const FLAC__int32 * const *)decoder->private_->output) != FLAC__STREAM_DECODER_WRITE_STATUS_CONTINUE)
 			return false;
 	}
 
@@ -1526,40 +2146,48 @@ FLAC__bool read_frame_header_(FLAC__StreamDecoder *decoder)
 	FLAC__byte crc8, raw_header[16]; /* MAGIC NUMBER based on the maximum frame header size, including CRC */
 	unsigned raw_header_len;
 	FLAC__bool is_unparseable = false;
-	const FLAC__bool is_known_variable_blocksize_stream = (decoder->private_->has_stream_info && decoder->private_->stream_info.data.stream_info.min_blocksize != decoder->private_->stream_info.data.stream_info.max_blocksize);
-	const FLAC__bool is_known_fixed_blocksize_stream = (decoder->private_->has_stream_info && decoder->private_->stream_info.data.stream_info.min_blocksize == decoder->private_->stream_info.data.stream_info.max_blocksize);
 
-	FLAC__ASSERT(FLAC__bitbuffer_is_consumed_byte_aligned(decoder->private_->input));
+	FLAC__ASSERT(FLAC__bitreader_is_consumed_byte_aligned(decoder->private_->input));
 
 	/* init the raw header with the saved bits from synchronization */
 	raw_header[0] = decoder->private_->header_warmup[0];
 	raw_header[1] = decoder->private_->header_warmup[1];
 	raw_header_len = 2;
 
-	/*
-	 * check to make sure that the reserved bits are 0
-	 */
-	if(raw_header[1] & 0x03) { /* MAGIC NUMBER */
+	/* check to make sure that reserved bit is 0 */
+	if(raw_header[1] & 0x02) /* MAGIC NUMBER */
 		is_unparseable = true;
-	}
 
 	/*
 	 * Note that along the way as we read the header, we look for a sync
 	 * code inside.  If we find one it would indicate that our original
 	 * sync was bad since there cannot be a sync code in a valid header.
+	 *
+	 * Three kinds of things can go wrong when reading the frame header:
+	 *  1) We may have sync'ed incorrectly and not landed on a frame header.
+	 *     If we don't find a sync code, it can end up looking like we read
+	 *     a valid but unparseable header, until getting to the frame header
+	 *     CRC.  Even then we could get a false positive on the CRC.
+	 *  2) We may have sync'ed correctly but on an unparseable frame (from a
+	 *     future encoder).
+	 *  3) We may be on a damaged frame which appears valid but unparseable.
+	 *
+	 * For all these reasons, we try and read a complete frame header as
+	 * long as it seems valid, even if unparseable, up until the frame
+	 * header CRC.
 	 */
 
 	/*
 	 * read in the raw header as bytes so we can CRC it, and parse it on the way
 	 */
 	for(i = 0; i < 2; i++) {
-		if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, 8, read_callback_, decoder))
-			return false; /* the read_callback_ sets the state for us */
+		if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, 8))
+			return false; /* read_callback_ sets the state for us */
 		if(x == 0xff) { /* MAGIC NUMBER for the first 8 frame sync bits */
 			/* if we get here it means our original sync was erroneous since the sync code cannot appear in the header */
 			decoder->private_->lookahead = (FLAC__byte)x;
 			decoder->private_->cached = true;
-			decoder->private_->error_callback(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_BAD_HEADER, decoder->private_->client_data);
+			send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_BAD_HEADER);
 			decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
 			return true;
 		}
@@ -1568,10 +2196,7 @@ FLAC__bool read_frame_header_(FLAC__StreamDecoder *decoder)
 
 	switch(x = raw_header[2] >> 4) {
 		case 0:
-			if(is_known_fixed_blocksize_stream)
-				decoder->private_->frame.header.blocksize = decoder->private_->stream_info.data.stream_info.min_blocksize;
-			else
-				is_unparseable = true;
+			is_unparseable = true;
 			break;
 		case 1:
 			decoder->private_->frame.header.blocksize = 192;
@@ -1609,9 +2234,13 @@ FLAC__bool read_frame_header_(FLAC__StreamDecoder *decoder)
 				is_unparseable = true;
 			break;
 		case 1:
+			decoder->private_->frame.header.sample_rate = 88200;
+			break;
 		case 2:
+			decoder->private_->frame.header.sample_rate = 176400;
+			break;
 		case 3:
-			is_unparseable = true;
+			decoder->private_->frame.header.sample_rate = 192000;
 			break;
 		case 4:
 			decoder->private_->frame.header.sample_rate = 8000;
@@ -1643,7 +2272,7 @@ FLAC__bool read_frame_header_(FLAC__StreamDecoder *decoder)
 			sample_rate_hint = x;
 			break;
 		case 15:
-			decoder->private_->error_callback(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_BAD_HEADER, decoder->private_->client_data);
+			send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_BAD_HEADER);
 			decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
 			return true;
 		default:
@@ -1704,78 +2333,50 @@ FLAC__bool read_frame_header_(FLAC__StreamDecoder *decoder)
 			break;
 	}
 
-	if(raw_header[3] & 0x01) { /* this should be a zero padding bit */
-		decoder->private_->error_callback(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_BAD_HEADER, decoder->private_->client_data);
-		decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
-		return true;
-	}
+	/* check to make sure that reserved bit is 0 */
+	if(raw_header[3] & 0x01) /* MAGIC NUMBER */
+		is_unparseable = true;
 
-	/*
-	 * Now we get to the regrettable consequences of not knowing for sure
-	 * whether we got a frame number or a sample number.  There are no
-	 * encoders that do variable-blocksize encoding so unless we know from
-	 * the STREAMINFO that it is variable-blocksize we will assume it is
-	 * fixed-blocksize.  The trouble comes when we have no STREAMINFO; again
-	 * we will guess that is fixed-blocksize.  Where this can go wrong: 1) a
-	 * variable-blocksize stream with no STREAMINFO; 2) a fixed-blocksize
-	 * stream that was edited such that one or more frames before or
-	 * including this one do not have the same number of samples as the
-	 * STREAMINFO's min and max blocksize.
-	 */
-	if(is_known_variable_blocksize_stream) {
-		if(blocksize_hint) {
-			if(!FLAC__bitbuffer_read_utf8_uint64(decoder->private_->input, &xx, read_callback_, decoder, raw_header, &raw_header_len))
-				return false; /* the read_callback_ sets the state for us */
-			if(xx == FLAC__U64L(0xffffffffffffffff)) { /* i.e. non-UTF8 code... */
-				decoder->private_->lookahead = raw_header[raw_header_len-1]; /* back up as much as we can */
-				decoder->private_->cached = true;
-				decoder->private_->error_callback(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_BAD_HEADER, decoder->private_->client_data);
-				decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
-				return true;
-			}
-			decoder->private_->frame.header.number_type = FLAC__FRAME_NUMBER_TYPE_SAMPLE_NUMBER;
-			decoder->private_->frame.header.number.sample_number = xx;
-		}
-		else
-			is_unparseable = true;
-	}
-	else {
-		if(!FLAC__bitbuffer_read_utf8_uint32(decoder->private_->input, &x, read_callback_, decoder, raw_header, &raw_header_len))
-			return false; /* the read_callback_ sets the state for us */
-		if(x == 0xffffffff) { /* i.e. non-UTF8 code... */
+	/* read the frame's starting sample number (or frame number as the case may be) */
+	if(
+		raw_header[1] & 0x01 ||
+		/*@@@ this clause is a concession to the old way of doing variable blocksize; the only known implementation is flake and can probably be removed without inconveniencing anyone */
+		(decoder->private_->has_stream_info && decoder->private_->stream_info.data.stream_info.min_blocksize != decoder->private_->stream_info.data.stream_info.max_blocksize)
+	) { /* variable blocksize */
+		if(!FLAC__bitreader_read_utf8_uint64(decoder->private_->input, &xx, raw_header, &raw_header_len))
+			return false; /* read_callback_ sets the state for us */
+		if(xx == FLAC__U64L(0xffffffffffffffff)) { /* i.e. non-UTF8 code... */
 			decoder->private_->lookahead = raw_header[raw_header_len-1]; /* back up as much as we can */
 			decoder->private_->cached = true;
-			decoder->private_->error_callback(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_BAD_HEADER, decoder->private_->client_data);
+			send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_BAD_HEADER);
 			decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
 			return true;
 		}
-		decoder->private_->last_frame_number = x;
 		decoder->private_->frame.header.number_type = FLAC__FRAME_NUMBER_TYPE_SAMPLE_NUMBER;
-		if(decoder->private_->has_stream_info) {
-			FLAC__ASSERT(decoder->private_->stream_info.data.stream_info.min_blocksize == decoder->private_->stream_info.data.stream_info.max_blocksize);
-			decoder->private_->frame.header.number.sample_number = (FLAC__uint64)decoder->private_->stream_info.data.stream_info.min_blocksize * (FLAC__uint64)x;
-			decoder->private_->last_block_size = decoder->private_->frame.header.blocksize;
-		}
-		else if(blocksize_hint) {
-			if(decoder->private_->last_block_size)
-				decoder->private_->frame.header.number.sample_number = (FLAC__uint64)decoder->private_->last_block_size * (FLAC__uint64)x;
-			else
-				is_unparseable = true;
-		}
-		else {
-			decoder->private_->frame.header.number.sample_number = (FLAC__uint64)decoder->private_->frame.header.blocksize * (FLAC__uint64)x;
-			decoder->private_->last_block_size = decoder->private_->frame.header.blocksize;
+		decoder->private_->frame.header.number.sample_number = xx;
+	}
+	else { /* fixed blocksize */
+		if(!FLAC__bitreader_read_utf8_uint32(decoder->private_->input, &x, raw_header, &raw_header_len))
+			return false; /* read_callback_ sets the state for us */
+		if(x == 0xffffffff) { /* i.e. non-UTF8 code... */
+			decoder->private_->lookahead = raw_header[raw_header_len-1]; /* back up as much as we can */
+			decoder->private_->cached = true;
+			send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_BAD_HEADER);
+			decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
+			return true;
 		}
+		decoder->private_->frame.header.number_type = FLAC__FRAME_NUMBER_TYPE_FRAME_NUMBER;
+		decoder->private_->frame.header.number.frame_number = x;
 	}
 
 	if(blocksize_hint) {
-		if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, 8, read_callback_, decoder))
-			return false; /* the read_callback_ sets the state for us */
+		if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, 8))
+			return false; /* read_callback_ sets the state for us */
 		raw_header[raw_header_len++] = (FLAC__byte)x;
 		if(blocksize_hint == 7) {
 			FLAC__uint32 _x;
-			if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &_x, 8, read_callback_, decoder))
-				return false; /* the read_callback_ sets the state for us */
+			if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &_x, 8))
+				return false; /* read_callback_ sets the state for us */
 			raw_header[raw_header_len++] = (FLAC__byte)_x;
 			x = (x << 8) | _x;
 		}
@@ -1783,13 +2384,13 @@ FLAC__bool read_frame_header_(FLAC__StreamDecoder *decoder)
 	}
 
 	if(sample_rate_hint) {
-		if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, 8, read_callback_, decoder))
-			return false; /* the read_callback_ sets the state for us */
+		if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, 8))
+			return false; /* read_callback_ sets the state for us */
 		raw_header[raw_header_len++] = (FLAC__byte)x;
 		if(sample_rate_hint != 12) {
 			FLAC__uint32 _x;
-			if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &_x, 8, read_callback_, decoder))
-				return false; /* the read_callback_ sets the state for us */
+			if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &_x, 8))
+				return false; /* read_callback_ sets the state for us */
 			raw_header[raw_header_len++] = (FLAC__byte)_x;
 			x = (x << 8) | _x;
 		}
@@ -1802,19 +2403,45 @@ FLAC__bool read_frame_header_(FLAC__StreamDecoder *decoder)
 	}
 
 	/* read the CRC-8 byte */
-	if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, 8, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, 8))
+		return false; /* read_callback_ sets the state for us */
 	crc8 = (FLAC__byte)x;
 
 	if(FLAC__crc8(raw_header, raw_header_len) != crc8) {
-		decoder->private_->error_callback(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_BAD_HEADER, decoder->private_->client_data);
+		send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_BAD_HEADER);
 		decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
 		return true;
 	}
 
+	/* calculate the sample number from the frame number if needed */
+	decoder->private_->next_fixed_block_size = 0;
+	if(decoder->private_->frame.header.number_type == FLAC__FRAME_NUMBER_TYPE_FRAME_NUMBER) {
+		x = decoder->private_->frame.header.number.frame_number;
+		decoder->private_->frame.header.number_type = FLAC__FRAME_NUMBER_TYPE_SAMPLE_NUMBER;
+		if(decoder->private_->fixed_block_size)
+			decoder->private_->frame.header.number.sample_number = (FLAC__uint64)decoder->private_->fixed_block_size * (FLAC__uint64)x;
+		else if(decoder->private_->has_stream_info) {
+			if(decoder->private_->stream_info.data.stream_info.min_blocksize == decoder->private_->stream_info.data.stream_info.max_blocksize) {
+				decoder->private_->frame.header.number.sample_number = (FLAC__uint64)decoder->private_->stream_info.data.stream_info.min_blocksize * (FLAC__uint64)x;
+				decoder->private_->next_fixed_block_size = decoder->private_->stream_info.data.stream_info.max_blocksize;
+			}
+			else
+				is_unparseable = true;
+		}
+		else if(x == 0) {
+			decoder->private_->frame.header.number.sample_number = 0;
+			decoder->private_->next_fixed_block_size = decoder->private_->frame.header.blocksize;
+		}
+		else {
+			/* can only get here if the stream has invalid frame numbering and no STREAMINFO, so assume it's not the last (possibly short) frame */
+			decoder->private_->frame.header.number.sample_number = (FLAC__uint64)decoder->private_->frame.header.blocksize * (FLAC__uint64)x;
+		}
+	}
+
 	if(is_unparseable) {
-		decoder->protected_->state = FLAC__STREAM_DECODER_UNPARSEABLE_STREAM;
-		return false;
+		send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_UNPARSEABLE_STREAM);
+		decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
+		return true;
 	}
 
 	return true;
@@ -1824,17 +2451,18 @@ FLAC__bool read_subframe_(FLAC__StreamDecoder *decoder, unsigned channel, unsign
 {
 	FLAC__uint32 x;
 	FLAC__bool wasted_bits;
+	unsigned i;
 
-	if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &x, 8, read_callback_, decoder)) /* MAGIC NUMBER */
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, 8)) /* MAGIC NUMBER */
+		return false; /* read_callback_ sets the state for us */
 
 	wasted_bits = (x & 1);
 	x &= 0xfe;
 
 	if(wasted_bits) {
 		unsigned u;
-		if(!FLAC__bitbuffer_read_unary_unsigned(decoder->private_->input, &u, read_callback_, decoder))
-			return false; /* the read_callback_ sets the state for us */
+		if(!FLAC__bitreader_read_unary_unsigned(decoder->private_->input, &u))
+			return false; /* read_callback_ sets the state for us */
 		decoder->private_->frame.subframes[channel].wasted_bits = u+1;
 		bps -= decoder->private_->frame.subframes[channel].wasted_bits;
 	}
@@ -1845,7 +2473,7 @@ FLAC__bool read_subframe_(FLAC__StreamDecoder *decoder, unsigned channel, unsign
 	 * Lots of magic numbers here
 	 */
 	if(x & 0x80) {
-		decoder->private_->error_callback(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC, decoder->private_->client_data);
+		send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC);
 		decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
 		return true;
 	}
@@ -1858,24 +2486,29 @@ FLAC__bool read_subframe_(FLAC__StreamDecoder *decoder, unsigned channel, unsign
 			return false;
 	}
 	else if(x < 16) {
-		decoder->protected_->state = FLAC__STREAM_DECODER_UNPARSEABLE_STREAM;
-		return false;
+		send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_UNPARSEABLE_STREAM);
+		decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
+		return true;
 	}
 	else if(x <= 24) {
 		if(!read_subframe_fixed_(decoder, channel, bps, (x>>1)&7, do_full_decode))
 			return false;
+		if(decoder->protected_->state == FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC) /* means bad sync or got corruption */
+			return true;
 	}
 	else if(x < 64) {
-		decoder->protected_->state = FLAC__STREAM_DECODER_UNPARSEABLE_STREAM;
-		return false;
+		send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_UNPARSEABLE_STREAM);
+		decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
+		return true;
 	}
 	else {
 		if(!read_subframe_lpc_(decoder, channel, bps, ((x>>1)&31)+1, do_full_decode))
 			return false;
+		if(decoder->protected_->state == FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC) /* means bad sync or got corruption */
+			return true;
 	}
 
 	if(wasted_bits && do_full_decode) {
-		unsigned i;
 		x = decoder->private_->frame.subframes[channel].wasted_bits;
 		for(i = 0; i < decoder->private_->frame.header.blocksize; i++)
 			decoder->private_->output[channel][i] <<= x;
@@ -1893,8 +2526,8 @@ FLAC__bool read_subframe_constant_(FLAC__StreamDecoder *decoder, unsigned channe
 
 	decoder->private_->frame.subframes[channel].type = FLAC__SUBFRAME_TYPE_CONSTANT;
 
-	if(!FLAC__bitbuffer_read_raw_int32(decoder->private_->input, &x, bps, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_int32(decoder->private_->input, &x, bps))
+		return false; /* read_callback_ sets the state for us */
 
 	subframe->value = x;
 
@@ -1921,31 +2554,34 @@ FLAC__bool read_subframe_fixed_(FLAC__StreamDecoder *decoder, unsigned channel,
 
 	/* read warm-up samples */
 	for(u = 0; u < order; u++) {
-		if(!FLAC__bitbuffer_read_raw_int32(decoder->private_->input, &i32, bps, read_callback_, decoder))
-			return false; /* the read_callback_ sets the state for us */
+		if(!FLAC__bitreader_read_raw_int32(decoder->private_->input, &i32, bps))
+			return false; /* read_callback_ sets the state for us */
 		subframe->warmup[u] = i32;
 	}
 
 	/* read entropy coding method info */
-	if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &u32, FLAC__ENTROPY_CODING_METHOD_TYPE_LEN, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &u32, FLAC__ENTROPY_CODING_METHOD_TYPE_LEN))
+		return false; /* read_callback_ sets the state for us */
 	subframe->entropy_coding_method.type = (FLAC__EntropyCodingMethodType)u32;
 	switch(subframe->entropy_coding_method.type) {
 		case FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE:
-			if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &u32, FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ORDER_LEN, read_callback_, decoder))
-				return false; /* the read_callback_ sets the state for us */
+		case FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2:
+			if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &u32, FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ORDER_LEN))
+				return false; /* read_callback_ sets the state for us */
 			subframe->entropy_coding_method.data.partitioned_rice.order = u32;
 			subframe->entropy_coding_method.data.partitioned_rice.contents = &decoder->private_->partitioned_rice_contents[channel];
 			break;
 		default:
-			decoder->protected_->state = FLAC__STREAM_DECODER_UNPARSEABLE_STREAM;
-			return false;
+			send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_UNPARSEABLE_STREAM);
+			decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
+			return true;
 	}
 
 	/* read residual */
 	switch(subframe->entropy_coding_method.type) {
 		case FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE:
-			if(!read_residual_partitioned_rice_(decoder, order, subframe->entropy_coding_method.data.partitioned_rice.order, &decoder->private_->partitioned_rice_contents[channel], decoder->private_->residual[channel]))
+		case FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2:
+			if(!read_residual_partitioned_rice_(decoder, order, subframe->entropy_coding_method.data.partitioned_rice.order, &decoder->private_->partitioned_rice_contents[channel], decoder->private_->residual[channel], /*is_extended=*/subframe->entropy_coding_method.type == FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2))
 				return false;
 			break;
 		default:
@@ -1975,53 +2611,56 @@ FLAC__bool read_subframe_lpc_(FLAC__StreamDecoder *decoder, unsigned channel, un
 
 	/* read warm-up samples */
 	for(u = 0; u < order; u++) {
-		if(!FLAC__bitbuffer_read_raw_int32(decoder->private_->input, &i32, bps, read_callback_, decoder))
-			return false; /* the read_callback_ sets the state for us */
+		if(!FLAC__bitreader_read_raw_int32(decoder->private_->input, &i32, bps))
+			return false; /* read_callback_ sets the state for us */
 		subframe->warmup[u] = i32;
 	}
 
 	/* read qlp coeff precision */
-	if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &u32, FLAC__SUBFRAME_LPC_QLP_COEFF_PRECISION_LEN, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &u32, FLAC__SUBFRAME_LPC_QLP_COEFF_PRECISION_LEN))
+		return false; /* read_callback_ sets the state for us */
 	if(u32 == (1u << FLAC__SUBFRAME_LPC_QLP_COEFF_PRECISION_LEN) - 1) {
-		decoder->private_->error_callback(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC, decoder->private_->client_data);
+		send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC);
 		decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
 		return true;
 	}
 	subframe->qlp_coeff_precision = u32+1;
 
 	/* read qlp shift */
-	if(!FLAC__bitbuffer_read_raw_int32(decoder->private_->input, &i32, FLAC__SUBFRAME_LPC_QLP_SHIFT_LEN, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_int32(decoder->private_->input, &i32, FLAC__SUBFRAME_LPC_QLP_SHIFT_LEN))
+		return false; /* read_callback_ sets the state for us */
 	subframe->quantization_level = i32;
 
 	/* read quantized lp coefficiencts */
 	for(u = 0; u < order; u++) {
-		if(!FLAC__bitbuffer_read_raw_int32(decoder->private_->input, &i32, subframe->qlp_coeff_precision, read_callback_, decoder))
-			return false; /* the read_callback_ sets the state for us */
+		if(!FLAC__bitreader_read_raw_int32(decoder->private_->input, &i32, subframe->qlp_coeff_precision))
+			return false; /* read_callback_ sets the state for us */
 		subframe->qlp_coeff[u] = i32;
 	}
 
 	/* read entropy coding method info */
-	if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &u32, FLAC__ENTROPY_CODING_METHOD_TYPE_LEN, read_callback_, decoder))
-		return false; /* the read_callback_ sets the state for us */
+	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &u32, FLAC__ENTROPY_CODING_METHOD_TYPE_LEN))
+		return false; /* read_callback_ sets the state for us */
 	subframe->entropy_coding_method.type = (FLAC__EntropyCodingMethodType)u32;
 	switch(subframe->entropy_coding_method.type) {
 		case FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE:
-			if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &u32, FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ORDER_LEN, read_callback_, decoder))
-				return false; /* the read_callback_ sets the state for us */
+		case FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2:
+			if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &u32, FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ORDER_LEN))
+				return false; /* read_callback_ sets the state for us */
 			subframe->entropy_coding_method.data.partitioned_rice.order = u32;
 			subframe->entropy_coding_method.data.partitioned_rice.contents = &decoder->private_->partitioned_rice_contents[channel];
 			break;
 		default:
-			decoder->protected_->state = FLAC__STREAM_DECODER_UNPARSEABLE_STREAM;
-			return false;
+			send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_UNPARSEABLE_STREAM);
+			decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
+			return true;
 	}
 
 	/* read residual */
 	switch(subframe->entropy_coding_method.type) {
 		case FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE:
-			if(!read_residual_partitioned_rice_(decoder, order, subframe->entropy_coding_method.data.partitioned_rice.order, &decoder->private_->partitioned_rice_contents[channel], decoder->private_->residual[channel]))
+		case FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2:
+			if(!read_residual_partitioned_rice_(decoder, order, subframe->entropy_coding_method.data.partitioned_rice.order, &decoder->private_->partitioned_rice_contents[channel], decoder->private_->residual[channel], /*is_extended=*/subframe->entropy_coding_method.type == FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2))
 				return false;
 			break;
 		default:
@@ -2031,6 +2670,9 @@ FLAC__bool read_subframe_lpc_(FLAC__StreamDecoder *decoder, unsigned channel, un
 	/* decode the subframe */
 	if(do_full_decode) {
 		memcpy(decoder->private_->output[channel], subframe->warmup, sizeof(FLAC__int32) * order);
+		/*@@@@@@ technically not pessimistic enough, should be more like
+		if( (FLAC__uint64)order * ((((FLAC__uint64)1)<<bps)-1) * ((1<<subframe->qlp_coeff_precision)-1) < (((FLAC__uint64)-1) << 32) )
+		*/
 		if(bps + subframe->qlp_coeff_precision + FLAC__bitmath_ilog2(order) <= 32)
 			if(bps <= 16 && subframe->qlp_coeff_precision <= 16) {
 				if(order <= 8)
@@ -2058,8 +2700,8 @@ FLAC__bool read_subframe_verbatim_(FLAC__StreamDecoder *decoder, unsigned channe
 	subframe->data = residual;
 
 	for(i = 0; i < decoder->private_->frame.header.blocksize; i++) {
-		if(!FLAC__bitbuffer_read_raw_int32(decoder->private_->input, &x, bps, read_callback_, decoder))
-			return false; /* the read_callback_ sets the state for us */
+		if(!FLAC__bitreader_read_raw_int32(decoder->private_->input, &x, bps))
+			return false; /* read_callback_ sets the state for us */
 		residual[i] = x;
 	}
 
@@ -2070,25 +2712,27 @@ FLAC__bool read_subframe_verbatim_(FLAC__StreamDecoder *decoder, unsigned channe
 	return true;
 }
 
-FLAC__bool read_residual_partitioned_rice_(FLAC__StreamDecoder *decoder, unsigned predictor_order, unsigned partition_order, FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents, FLAC__int32 *residual)
+FLAC__bool read_residual_partitioned_rice_(FLAC__StreamDecoder *decoder, unsigned predictor_order, unsigned partition_order, FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents, FLAC__int32 *residual, FLAC__bool is_extended)
 {
 	FLAC__uint32 rice_parameter;
 	int i;
 	unsigned partition, sample, u;
 	const unsigned partitions = 1u << partition_order;
 	const unsigned partition_samples = partition_order > 0? decoder->private_->frame.header.blocksize >> partition_order : decoder->private_->frame.header.blocksize - predictor_order;
+	const unsigned plen = is_extended? FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_PARAMETER_LEN : FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN;
+	const unsigned pesc = is_extended? FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_ESCAPE_PARAMETER : FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER;
 
 	/* sanity checks */
 	if(partition_order == 0) {
 		if(decoder->private_->frame.header.blocksize < predictor_order) {
-			decoder->private_->error_callback(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC, decoder->private_->client_data);
+			send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC);
 			decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
 			return true;
 		}
 	}
 	else {
 		if(partition_samples < predictor_order) {
-			decoder->private_->error_callback(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC, decoder->private_->client_data);
+			send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC);
 			decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
 			return true;
 		}
@@ -2101,22 +2745,23 @@ FLAC__bool read_residual_partitioned_rice_(FLAC__StreamDecoder *decoder, unsigne
 
 	sample = 0;
 	for(partition = 0; partition < partitions; partition++) {
-		if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &rice_parameter, FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN, read_callback_, decoder))
-			return false; /* the read_callback_ sets the state for us */
+		if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &rice_parameter, plen))
+			return false; /* read_callback_ sets the state for us */
 		partitioned_rice_contents->parameters[partition] = rice_parameter;
-		if(rice_parameter < FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER) {
+		if(rice_parameter < pesc) {
+			partitioned_rice_contents->raw_bits[partition] = 0;
 			u = (partition_order == 0 || partition > 0)? partition_samples : partition_samples - predictor_order;
-			if(!FLAC__bitbuffer_read_rice_signed_block(decoder->private_->input, residual + sample, u, rice_parameter, read_callback_, decoder))
-				return false; /* the read_callback_ sets the state for us */
+			if(!decoder->private_->local_bitreader_read_rice_signed_block(decoder->private_->input, residual + sample, u, rice_parameter))
+				return false; /* read_callback_ sets the state for us */
 			sample += u;
 		}
 		else {
-			if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &rice_parameter, FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_RAW_LEN, read_callback_, decoder))
-				return false; /* the read_callback_ sets the state for us */
+			if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &rice_parameter, FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_RAW_LEN))
+				return false; /* read_callback_ sets the state for us */
 			partitioned_rice_contents->raw_bits[partition] = rice_parameter;
 			for(u = (partition_order == 0 || partition > 0)? 0 : predictor_order; u < partition_samples; u++, sample++) {
-				if(!FLAC__bitbuffer_read_raw_int32(decoder->private_->input, &i, rice_parameter, read_callback_, decoder))
-					return false; /* the read_callback_ sets the state for us */
+				if(!FLAC__bitreader_read_raw_int32(decoder->private_->input, &i, rice_parameter))
+					return false; /* read_callback_ sets the state for us */
 				residual[sample] = i;
 			}
 		}
@@ -2127,27 +2772,617 @@ FLAC__bool read_residual_partitioned_rice_(FLAC__StreamDecoder *decoder, unsigne
 
 FLAC__bool read_zero_padding_(FLAC__StreamDecoder *decoder)
 {
-	if(!FLAC__bitbuffer_is_consumed_byte_aligned(decoder->private_->input)) {
+	if(!FLAC__bitreader_is_consumed_byte_aligned(decoder->private_->input)) {
 		FLAC__uint32 zero = 0;
-		if(!FLAC__bitbuffer_read_raw_uint32(decoder->private_->input, &zero, FLAC__bitbuffer_bits_left_for_byte_alignment(decoder->private_->input), read_callback_, decoder))
-			return false; /* the read_callback_ sets the state for us */
+		if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &zero, FLAC__bitreader_bits_left_for_byte_alignment(decoder->private_->input)))
+			return false; /* read_callback_ sets the state for us */
 		if(zero != 0) {
-			decoder->private_->error_callback(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC, decoder->private_->client_data);
+			send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC);
 			decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
 		}
 	}
 	return true;
 }
 
-FLAC__bool read_callback_(FLAC__byte buffer[], unsigned *bytes, void *client_data)
+FLAC__bool read_callback_(FLAC__byte buffer[], size_t *bytes, void *client_data)
 {
 	FLAC__StreamDecoder *decoder = (FLAC__StreamDecoder *)client_data;
-	FLAC__StreamDecoderReadStatus status;
 
-	status = decoder->private_->read_callback(decoder, buffer, bytes, decoder->private_->client_data);
-	if(status == FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM)
+	if(
+#if FLAC__HAS_OGG
+		/* see [1] HACK NOTE below for why we don't call the eof_callback when decoding Ogg FLAC */
+		!decoder->private_->is_ogg &&
+#endif
+		decoder->private_->eof_callback && decoder->private_->eof_callback(decoder, decoder->private_->client_data)
+	) {
+		*bytes = 0;
 		decoder->protected_->state = FLAC__STREAM_DECODER_END_OF_STREAM;
-	else if(status == FLAC__STREAM_DECODER_READ_STATUS_ABORT)
+		return false;
+	}
+	else if(*bytes > 0) {
+		/* While seeking, it is possible for our seek to land in the
+		 * middle of audio data that looks exactly like a frame header
+		 * from a future version of an encoder.  When that happens, our
+		 * error callback will get an
+		 * FLAC__STREAM_DECODER_UNPARSEABLE_STREAM and increment its
+		 * unparseable_frame_count.  But there is a remote possibility
+		 * that it is properly synced at such a "future-codec frame",
+		 * so to make sure, we wait to see many "unparseable" errors in
+		 * a row before bailing out.
+		 */
+		if(decoder->private_->is_seeking && decoder->private_->unparseable_frame_count > 20) {
+			decoder->protected_->state = FLAC__STREAM_DECODER_ABORTED;
+			return false;
+		}
+		else {
+			const FLAC__StreamDecoderReadStatus status =
+#if FLAC__HAS_OGG
+				decoder->private_->is_ogg?
+				read_callback_ogg_aspect_(decoder, buffer, bytes) :
+#endif
+				decoder->private_->read_callback(decoder, buffer, bytes, decoder->private_->client_data)
+			;
+			if(status == FLAC__STREAM_DECODER_READ_STATUS_ABORT) {
+				decoder->protected_->state = FLAC__STREAM_DECODER_ABORTED;
+				return false;
+			}
+			else if(*bytes == 0) {
+				if(
+					status == FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM ||
+					(
+#if FLAC__HAS_OGG
+						/* see [1] HACK NOTE below for why we don't call the eof_callback when decoding Ogg FLAC */
+						!decoder->private_->is_ogg &&
+#endif
+						decoder->private_->eof_callback && decoder->private_->eof_callback(decoder, decoder->private_->client_data)
+					)
+				) {
+					decoder->protected_->state = FLAC__STREAM_DECODER_END_OF_STREAM;
+					return false;
+				}
+				else
+					return true;
+			}
+			else
+				return true;
+		}
+	}
+	else {
+		/* abort to avoid a deadlock */
 		decoder->protected_->state = FLAC__STREAM_DECODER_ABORTED;
-	return status == FLAC__STREAM_DECODER_READ_STATUS_CONTINUE;
+		return false;
+	}
+	/* [1] @@@ HACK NOTE: The end-of-stream checking has to be hacked around
+	 * for Ogg FLAC.  This is because the ogg decoder aspect can lose sync
+	 * and at the same time hit the end of the stream (for example, seeking
+	 * to a point that is after the beginning of the last Ogg page).  There
+	 * is no way to report an Ogg sync loss through the callbacks (see note
+	 * in read_callback_ogg_aspect_()) so it returns CONTINUE with *bytes==0.
+	 * So to keep the decoder from stopping at this point we gate the call
+	 * to the eof_callback and let the Ogg decoder aspect set the
+	 * end-of-stream state when it is needed.
+	 */
+}
+
+#if FLAC__HAS_OGG
+FLAC__StreamDecoderReadStatus read_callback_ogg_aspect_(const FLAC__StreamDecoder *decoder, FLAC__byte buffer[], size_t *bytes)
+{
+	switch(FLAC__ogg_decoder_aspect_read_callback_wrapper(&decoder->protected_->ogg_decoder_aspect, buffer, bytes, read_callback_proxy_, decoder, decoder->private_->client_data)) {
+		case FLAC__OGG_DECODER_ASPECT_READ_STATUS_OK:
+			return FLAC__STREAM_DECODER_READ_STATUS_CONTINUE;
+		/* we don't really have a way to handle lost sync via read
+		 * callback so we'll let it pass and let the underlying
+		 * FLAC decoder catch the error
+		 */
+		case FLAC__OGG_DECODER_ASPECT_READ_STATUS_LOST_SYNC:
+			return FLAC__STREAM_DECODER_READ_STATUS_CONTINUE;
+		case FLAC__OGG_DECODER_ASPECT_READ_STATUS_END_OF_STREAM:
+			return FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM;
+		case FLAC__OGG_DECODER_ASPECT_READ_STATUS_NOT_FLAC:
+		case FLAC__OGG_DECODER_ASPECT_READ_STATUS_UNSUPPORTED_MAPPING_VERSION:
+		case FLAC__OGG_DECODER_ASPECT_READ_STATUS_ABORT:
+		case FLAC__OGG_DECODER_ASPECT_READ_STATUS_ERROR:
+		case FLAC__OGG_DECODER_ASPECT_READ_STATUS_MEMORY_ALLOCATION_ERROR:
+			return FLAC__STREAM_DECODER_READ_STATUS_ABORT;
+		default:
+			FLAC__ASSERT(0);
+			/* double protection */
+			return FLAC__STREAM_DECODER_READ_STATUS_ABORT;
+	}
+}
+
+FLAC__OggDecoderAspectReadStatus read_callback_proxy_(const void *void_decoder, FLAC__byte buffer[], size_t *bytes, void *client_data)
+{
+	FLAC__StreamDecoder *decoder = (FLAC__StreamDecoder*)void_decoder;
+
+	switch(decoder->private_->read_callback(decoder, buffer, bytes, client_data)) {
+		case FLAC__STREAM_DECODER_READ_STATUS_CONTINUE:
+			return FLAC__OGG_DECODER_ASPECT_READ_STATUS_OK;
+		case FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM:
+			return FLAC__OGG_DECODER_ASPECT_READ_STATUS_END_OF_STREAM;
+		case FLAC__STREAM_DECODER_READ_STATUS_ABORT:
+			return FLAC__OGG_DECODER_ASPECT_READ_STATUS_ABORT;
+		default:
+			/* double protection: */
+			FLAC__ASSERT(0);
+			return FLAC__OGG_DECODER_ASPECT_READ_STATUS_ABORT;
+	}
+}
+#endif
+
+FLAC__StreamDecoderWriteStatus write_audio_frame_to_client_(FLAC__StreamDecoder *decoder, const FLAC__Frame *frame, const FLAC__int32 * const buffer[])
+{
+	if(decoder->private_->is_seeking) {
+		FLAC__uint64 this_frame_sample = frame->header.number.sample_number;
+		FLAC__uint64 next_frame_sample = this_frame_sample + (FLAC__uint64)frame->header.blocksize;
+		FLAC__uint64 target_sample = decoder->private_->target_sample;
+
+		FLAC__ASSERT(frame->header.number_type == FLAC__FRAME_NUMBER_TYPE_SAMPLE_NUMBER);
+
+#if FLAC__HAS_OGG
+		decoder->private_->got_a_frame = true;
+#endif
+		decoder->private_->last_frame = *frame; /* save the frame */
+		if(this_frame_sample <= target_sample && target_sample < next_frame_sample) { /* we hit our target frame */
+			unsigned delta = (unsigned)(target_sample - this_frame_sample);
+			/* kick out of seek mode */
+			decoder->private_->is_seeking = false;
+			/* shift out the samples before target_sample */
+			if(delta > 0) {
+				unsigned channel;
+				const FLAC__int32 *newbuffer[FLAC__MAX_CHANNELS];
+				for(channel = 0; channel < frame->header.channels; channel++)
+					newbuffer[channel] = buffer[channel] + delta;
+				decoder->private_->last_frame.header.blocksize -= delta;
+				decoder->private_->last_frame.header.number.sample_number += (FLAC__uint64)delta;
+				/* write the relevant samples */
+				return decoder->private_->write_callback(decoder, &decoder->private_->last_frame, newbuffer, decoder->private_->client_data);
+			}
+			else {
+				/* write the relevant samples */
+				return decoder->private_->write_callback(decoder, frame, buffer, decoder->private_->client_data);
+			}
+		}
+		else {
+			return FLAC__STREAM_DECODER_WRITE_STATUS_CONTINUE;
+		}
+	}
+	else {
+		/*
+		 * If we never got STREAMINFO, turn off MD5 checking to save
+		 * cycles since we don't have a sum to compare to anyway
+		 */
+		if(!decoder->private_->has_stream_info)
+			decoder->private_->do_md5_checking = false;
+		if(decoder->private_->do_md5_checking) {
+			if(!FLAC__MD5Accumulate(&decoder->private_->md5context, buffer, frame->header.channels, frame->header.blocksize, (frame->header.bits_per_sample+7) / 8))
+				return FLAC__STREAM_DECODER_WRITE_STATUS_ABORT;
+		}
+		return decoder->private_->write_callback(decoder, frame, buffer, decoder->private_->client_data);
+	}
+}
+
+void send_error_to_client_(const FLAC__StreamDecoder *decoder, FLAC__StreamDecoderErrorStatus status)
+{
+	if(!decoder->private_->is_seeking)
+		decoder->private_->error_callback(decoder, status, decoder->private_->client_data);
+	else if(status == FLAC__STREAM_DECODER_ERROR_STATUS_UNPARSEABLE_STREAM)
+		decoder->private_->unparseable_frame_count++;
+}
+
+FLAC__bool seek_to_absolute_sample_(FLAC__StreamDecoder *decoder, FLAC__uint64 stream_length, FLAC__uint64 target_sample)
+{
+	FLAC__uint64 first_frame_offset = decoder->private_->first_frame_offset, lower_bound, upper_bound, lower_bound_sample, upper_bound_sample, this_frame_sample;
+	FLAC__int64 pos = -1;
+	int i;
+	unsigned approx_bytes_per_frame;
+	FLAC__bool first_seek = true;
+	const FLAC__uint64 total_samples = FLAC__stream_decoder_get_total_samples(decoder);
+	const unsigned min_blocksize = decoder->private_->stream_info.data.stream_info.min_blocksize;
+	const unsigned max_blocksize = decoder->private_->stream_info.data.stream_info.max_blocksize;
+	const unsigned max_framesize = decoder->private_->stream_info.data.stream_info.max_framesize;
+	const unsigned min_framesize = decoder->private_->stream_info.data.stream_info.min_framesize;
+	/* take these from the current frame in case they've changed mid-stream */
+	unsigned channels = FLAC__stream_decoder_get_channels(decoder);
+	unsigned bps = FLAC__stream_decoder_get_bits_per_sample(decoder);
+	const FLAC__StreamMetadata_SeekTable *seek_table = decoder->private_->has_seek_table? &decoder->private_->seek_table.data.seek_table : 0;
+
+	/* use values from stream info if we didn't decode a frame */
+	if(channels == 0)
+		channels = decoder->private_->stream_info.data.stream_info.channels;
+	if(bps == 0)
+		bps = decoder->private_->stream_info.data.stream_info.bits_per_sample;
+
+	/* we are just guessing here */
+	if(max_framesize > 0)
+		approx_bytes_per_frame = (max_framesize + min_framesize) / 2 + 1;
+	/*
+	 * Check if it's a known fixed-blocksize stream.  Note that though
+	 * the spec doesn't allow zeroes in the STREAMINFO block, we may
+	 * never get a STREAMINFO block when decoding so the value of
+	 * min_blocksize might be zero.
+	 */
+	else if(min_blocksize == max_blocksize && min_blocksize > 0) {
+		/* note there are no () around 'bps/8' to keep precision up since it's an integer calulation */
+		approx_bytes_per_frame = min_blocksize * channels * bps/8 + 64;
+	}
+	else
+		approx_bytes_per_frame = 4096 * channels * bps/8 + 64;
+
+	/*
+	 * First, we set an upper and lower bound on where in the
+	 * stream we will search.  For now we assume the worst case
+	 * scenario, which is our best guess at the beginning of
+	 * the first frame and end of the stream.
+	 */
+	lower_bound = first_frame_offset;
+	lower_bound_sample = 0;
+	upper_bound = stream_length;
+	upper_bound_sample = total_samples > 0 ? total_samples : target_sample /*estimate it*/;
+
+	/*
+	 * Now we refine the bounds if we have a seektable with
+	 * suitable points.  Note that according to the spec they
+	 * must be ordered by ascending sample number.
+	 *
+	 * Note: to protect against invalid seek tables we will ignore points
+	 * that have frame_samples==0 or sample_number>=total_samples
+	 */
+	if(seek_table) {
+		FLAC__uint64 new_lower_bound = lower_bound;
+		FLAC__uint64 new_upper_bound = upper_bound;
+		FLAC__uint64 new_lower_bound_sample = lower_bound_sample;
+		FLAC__uint64 new_upper_bound_sample = upper_bound_sample;
+
+		/* find the closest seek point <= target_sample, if it exists */
+		for(i = (int)seek_table->num_points - 1; i >= 0; i--) {
+			if(
+				seek_table->points[i].sample_number != FLAC__STREAM_METADATA_SEEKPOINT_PLACEHOLDER &&
+				seek_table->points[i].frame_samples > 0 && /* defense against bad seekpoints */
+				(total_samples <= 0 || seek_table->points[i].sample_number < total_samples) && /* defense against bad seekpoints */
+				seek_table->points[i].sample_number <= target_sample
+			)
+				break;
+		}
+		if(i >= 0) { /* i.e. we found a suitable seek point... */
+			new_lower_bound = first_frame_offset + seek_table->points[i].stream_offset;
+			new_lower_bound_sample = seek_table->points[i].sample_number;
+		}
+
+		/* find the closest seek point > target_sample, if it exists */
+		for(i = 0; i < (int)seek_table->num_points; i++) {
+			if(
+				seek_table->points[i].sample_number != FLAC__STREAM_METADATA_SEEKPOINT_PLACEHOLDER &&
+				seek_table->points[i].frame_samples > 0 && /* defense against bad seekpoints */
+				(total_samples <= 0 || seek_table->points[i].sample_number < total_samples) && /* defense against bad seekpoints */
+				seek_table->points[i].sample_number > target_sample
+			)
+				break;
+		}
+		if(i < (int)seek_table->num_points) { /* i.e. we found a suitable seek point... */
+			new_upper_bound = first_frame_offset + seek_table->points[i].stream_offset;
+			new_upper_bound_sample = seek_table->points[i].sample_number;
+		}
+		/* final protection against unsorted seek tables; keep original values if bogus */
+		if(new_upper_bound >= new_lower_bound) {
+			lower_bound = new_lower_bound;
+			upper_bound = new_upper_bound;
+			lower_bound_sample = new_lower_bound_sample;
+			upper_bound_sample = new_upper_bound_sample;
+		}
+	}
+
+	FLAC__ASSERT(upper_bound_sample >= lower_bound_sample);
+	/* there are 2 insidious ways that the following equality occurs, which
+	 * we need to fix:
+	 *  1) total_samples is 0 (unknown) and target_sample is 0
+	 *  2) total_samples is 0 (unknown) and target_sample happens to be
+	 *     exactly equal to the last seek point in the seek table; this
+	 *     means there is no seek point above it, and upper_bound_samples
+	 *     remains equal to the estimate (of target_samples) we made above
+	 * in either case it does not hurt to move upper_bound_sample up by 1
+	 */
+	if(upper_bound_sample == lower_bound_sample)
+		upper_bound_sample++;
+
+	decoder->private_->target_sample = target_sample;
+	while(1) {
+		/* check if the bounds are still ok */
+		if (lower_bound_sample >= upper_bound_sample || lower_bound > upper_bound) {
+			decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
+			return false;
+		}
+#ifndef FLAC__INTEGER_ONLY_LIBRARY
+#if defined _MSC_VER || defined __MINGW32__
+		/* with VC++ you have to spoon feed it the casting */
+		pos = (FLAC__int64)lower_bound + (FLAC__int64)((FLAC__double)(FLAC__int64)(target_sample - lower_bound_sample) / (FLAC__double)(FLAC__int64)(upper_bound_sample - lower_bound_sample) * (FLAC__double)(FLAC__int64)(upper_bound - lower_bound)) - approx_bytes_per_frame;
+#else
+		pos = (FLAC__int64)lower_bound + (FLAC__int64)((FLAC__double)(target_sample - lower_bound_sample) / (FLAC__double)(upper_bound_sample - lower_bound_sample) * (FLAC__double)(upper_bound - lower_bound)) - approx_bytes_per_frame;
+#endif
+#else
+		/* a little less accurate: */
+		if(upper_bound - lower_bound < 0xffffffff)
+			pos = (FLAC__int64)lower_bound + (FLAC__int64)(((target_sample - lower_bound_sample) * (upper_bound - lower_bound)) / (upper_bound_sample - lower_bound_sample)) - approx_bytes_per_frame;
+		else /* @@@ WATCHOUT, ~2TB limit */
+			pos = (FLAC__int64)lower_bound + (FLAC__int64)((((target_sample - lower_bound_sample)>>8) * ((upper_bound - lower_bound)>>8)) / ((upper_bound_sample - lower_bound_sample)>>16)) - approx_bytes_per_frame;
+#endif
+		if(pos >= (FLAC__int64)upper_bound)
+			pos = (FLAC__int64)upper_bound - 1;
+		if(pos < (FLAC__int64)lower_bound)
+			pos = (FLAC__int64)lower_bound;
+		if(decoder->private_->seek_callback(decoder, (FLAC__uint64)pos, decoder->private_->client_data) != FLAC__STREAM_DECODER_SEEK_STATUS_OK) {
+			decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
+			return false;
+		}
+		if(!FLAC__stream_decoder_flush(decoder)) {
+			/* above call sets the state for us */
+			return false;
+		}
+		/* Now we need to get a frame.  First we need to reset our
+		 * unparseable_frame_count; if we get too many unparseable
+		 * frames in a row, the read callback will return
+		 * FLAC__STREAM_DECODER_READ_STATUS_ABORT, causing
+		 * FLAC__stream_decoder_process_single() to return false.
+		 */
+		decoder->private_->unparseable_frame_count = 0;
+		if(!FLAC__stream_decoder_process_single(decoder)) {
+			decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
+			return false;
+		}
+		/* our write callback will change the state when it gets to the target frame */
+		/* actually, we could have got_a_frame if our decoder is at FLAC__STREAM_DECODER_END_OF_STREAM so we need to check for that also */
+#if 0
+		/*@@@@@@ used to be the following; not clear if the check for end of stream is needed anymore */
+		if(decoder->protected_->state != FLAC__SEEKABLE_STREAM_DECODER_SEEKING && decoder->protected_->state != FLAC__STREAM_DECODER_END_OF_STREAM)
+			break;
+#endif
+		if(!decoder->private_->is_seeking)
+			break;
+
+		FLAC__ASSERT(decoder->private_->last_frame.header.number_type == FLAC__FRAME_NUMBER_TYPE_SAMPLE_NUMBER);
+		this_frame_sample = decoder->private_->last_frame.header.number.sample_number;
+
+		if (0 == decoder->private_->samples_decoded || (this_frame_sample + decoder->private_->last_frame.header.blocksize >= upper_bound_sample && !first_seek)) {
+			if (pos == (FLAC__int64)lower_bound) {
+				/* can't move back any more than the first frame, something is fatally wrong */
+				decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
+				return false;
+			}
+			/* our last move backwards wasn't big enough, try again */
+			approx_bytes_per_frame = approx_bytes_per_frame? approx_bytes_per_frame * 2 : 16;
+			continue;	
+		}
+		/* allow one seek over upper bound, so we can get a correct upper_bound_sample for streams with unknown total_samples */
+		first_seek = false;
+		
+		/* make sure we are not seeking in corrupted stream */
+		if (this_frame_sample < lower_bound_sample) {
+			decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
+			return false;
+		}
+
+		/* we need to narrow the search */
+		if(target_sample < this_frame_sample) {
+			upper_bound_sample = this_frame_sample + decoder->private_->last_frame.header.blocksize;
+/*@@@@@@ what will decode position be if at end of stream? */
+			if(!FLAC__stream_decoder_get_decode_position(decoder, &upper_bound)) {
+				decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
+				return false;
+			}
+			approx_bytes_per_frame = (unsigned)(2 * (upper_bound - pos) / 3 + 16);
+		}
+		else { /* target_sample >= this_frame_sample + this frame's blocksize */
+			lower_bound_sample = this_frame_sample + decoder->private_->last_frame.header.blocksize;
+			if(!FLAC__stream_decoder_get_decode_position(decoder, &lower_bound)) {
+				decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
+				return false;
+			}
+			approx_bytes_per_frame = (unsigned)(2 * (lower_bound - pos) / 3 + 16);
+		}
+	}
+
+	return true;
+}
+
+#if FLAC__HAS_OGG
+FLAC__bool seek_to_absolute_sample_ogg_(FLAC__StreamDecoder *decoder, FLAC__uint64 stream_length, FLAC__uint64 target_sample)
+{
+	FLAC__uint64 left_pos = 0, right_pos = stream_length;
+	FLAC__uint64 left_sample = 0, right_sample = FLAC__stream_decoder_get_total_samples(decoder);
+	FLAC__uint64 this_frame_sample = (FLAC__uint64)0 - 1;
+	FLAC__uint64 pos = 0; /* only initialized to avoid compiler warning */
+	FLAC__bool did_a_seek;
+	unsigned iteration = 0;
+
+	/* In the first iterations, we will calculate the target byte position 
+	 * by the distance from the target sample to left_sample and
+	 * right_sample (let's call it "proportional search").  After that, we
+	 * will switch to binary search.
+	 */
+	unsigned BINARY_SEARCH_AFTER_ITERATION = 2;
+
+	/* We will switch to a linear search once our current sample is less
+	 * than this number of samples ahead of the target sample
+	 */
+	static const FLAC__uint64 LINEAR_SEARCH_WITHIN_SAMPLES = FLAC__MAX_BLOCK_SIZE * 2;
+
+	/* If the total number of samples is unknown, use a large value, and
+	 * force binary search immediately.
+	 */
+	if(right_sample == 0) {
+		right_sample = (FLAC__uint64)(-1);
+		BINARY_SEARCH_AFTER_ITERATION = 0;
+	}
+
+	decoder->private_->target_sample = target_sample;
+	for( ; ; iteration++) {
+		if (iteration == 0 || this_frame_sample > target_sample || target_sample - this_frame_sample > LINEAR_SEARCH_WITHIN_SAMPLES) {
+			if (iteration >= BINARY_SEARCH_AFTER_ITERATION) {
+				pos = (right_pos + left_pos) / 2;
+			}
+			else {
+#ifndef FLAC__INTEGER_ONLY_LIBRARY
+#if defined _MSC_VER || defined __MINGW32__
+				/* with MSVC you have to spoon feed it the casting */
+				pos = (FLAC__uint64)((FLAC__double)(FLAC__int64)(target_sample - left_sample) / (FLAC__double)(FLAC__int64)(right_sample - left_sample) * (FLAC__double)(FLAC__int64)(right_pos - left_pos));
+#else
+				pos = (FLAC__uint64)((FLAC__double)(target_sample - left_sample) / (FLAC__double)(right_sample - left_sample) * (FLAC__double)(right_pos - left_pos));
+#endif
+#else
+				/* a little less accurate: */
+				if ((target_sample-left_sample <= 0xffffffff) && (right_pos-left_pos <= 0xffffffff))
+					pos = (FLAC__int64)(((target_sample-left_sample) * (right_pos-left_pos)) / (right_sample-left_sample));
+				else /* @@@ WATCHOUT, ~2TB limit */
+					pos = (FLAC__int64)((((target_sample-left_sample)>>8) * ((right_pos-left_pos)>>8)) / ((right_sample-left_sample)>>16));
+#endif
+				/* @@@ TODO: might want to limit pos to some distance
+				 * before EOF, to make sure we land before the last frame,
+				 * thereby getting a this_frame_sample and so having a better
+				 * estimate.
+				 */
+			}
+
+			/* physical seek */
+			if(decoder->private_->seek_callback((FLAC__StreamDecoder*)decoder, (FLAC__uint64)pos, decoder->private_->client_data) != FLAC__STREAM_DECODER_SEEK_STATUS_OK) {
+				decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
+				return false;
+			}
+			if(!FLAC__stream_decoder_flush(decoder)) {
+				/* above call sets the state for us */
+				return false;
+			}
+			did_a_seek = true;
+		}
+		else
+			did_a_seek = false;
+
+		decoder->private_->got_a_frame = false;
+		if(!FLAC__stream_decoder_process_single(decoder)) {
+			decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
+			return false;
+		}
+		if(!decoder->private_->got_a_frame) {
+			if(did_a_seek) {
+				/* this can happen if we seek to a point after the last frame; we drop
+				 * to binary search right away in this case to avoid any wasted
+				 * iterations of proportional search.
+				 */
+				right_pos = pos;
+				BINARY_SEARCH_AFTER_ITERATION = 0;
+			}
+			else {
+				/* this can probably only happen if total_samples is unknown and the
+				 * target_sample is past the end of the stream
+				 */
+				decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
+				return false;
+			}
+		}
+		/* our write callback will change the state when it gets to the target frame */
+		else if(!decoder->private_->is_seeking) {
+			break;
+		}
+		else {
+			this_frame_sample = decoder->private_->last_frame.header.number.sample_number;
+			FLAC__ASSERT(decoder->private_->last_frame.header.number_type == FLAC__FRAME_NUMBER_TYPE_SAMPLE_NUMBER);
+
+			if (did_a_seek) {
+				if (this_frame_sample <= target_sample) {
+					/* The 'equal' case should not happen, since
+					 * FLAC__stream_decoder_process_single()
+					 * should recognize that it has hit the
+					 * target sample and we would exit through
+					 * the 'break' above.
+					 */
+					FLAC__ASSERT(this_frame_sample != target_sample);
+
+					left_sample = this_frame_sample;
+					/* sanity check to avoid infinite loop */
+					if (left_pos == pos) {
+						decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
+						return false;
+					}
+					left_pos = pos;
+				}
+				else if(this_frame_sample > target_sample) {
+					right_sample = this_frame_sample;
+					/* sanity check to avoid infinite loop */
+					if (right_pos == pos) {
+						decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
+						return false;
+					}
+					right_pos = pos;
+				}
+			}
+		}
+	}
+
+	return true;
+}
+#endif
+
+FLAC__StreamDecoderReadStatus file_read_callback_(const FLAC__StreamDecoder *decoder, FLAC__byte buffer[], size_t *bytes, void *client_data)
+{
+	(void)client_data;
+
+	if(*bytes > 0) {
+		*bytes = fread(buffer, sizeof(FLAC__byte), *bytes, decoder->private_->file);
+		if(ferror(decoder->private_->file))
+			return FLAC__STREAM_DECODER_READ_STATUS_ABORT;
+		else if(*bytes == 0)
+			return FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM;
+		else
+			return FLAC__STREAM_DECODER_READ_STATUS_CONTINUE;
+	}
+	else
+		return FLAC__STREAM_DECODER_READ_STATUS_ABORT; /* abort to avoid a deadlock */
+}
+
+FLAC__StreamDecoderSeekStatus file_seek_callback_(const FLAC__StreamDecoder *decoder, FLAC__uint64 absolute_byte_offset, void *client_data)
+{
+	(void)client_data;
+
+	if(decoder->private_->file == stdin)
+		return FLAC__STREAM_DECODER_SEEK_STATUS_UNSUPPORTED;
+	else if(fseeko(decoder->private_->file, (off_t)absolute_byte_offset, SEEK_SET) < 0)
+		return FLAC__STREAM_DECODER_SEEK_STATUS_ERROR;
+	else
+		return FLAC__STREAM_DECODER_SEEK_STATUS_OK;
+}
+
+FLAC__StreamDecoderTellStatus file_tell_callback_(const FLAC__StreamDecoder *decoder, FLAC__uint64 *absolute_byte_offset, void *client_data)
+{
+	off_t pos;
+	(void)client_data;
+
+	if(decoder->private_->file == stdin)
+		return FLAC__STREAM_DECODER_TELL_STATUS_UNSUPPORTED;
+	else if((pos = ftello(decoder->private_->file)) < 0)
+		return FLAC__STREAM_DECODER_TELL_STATUS_ERROR;
+	else {
+		*absolute_byte_offset = (FLAC__uint64)pos;
+		return FLAC__STREAM_DECODER_TELL_STATUS_OK;
+	}
+}
+
+FLAC__StreamDecoderLengthStatus file_length_callback_(const FLAC__StreamDecoder *decoder, FLAC__uint64 *stream_length, void *client_data)
+{
+	struct stat filestats;
+	(void)client_data;
+
+	if(decoder->private_->file == stdin)
+		return FLAC__STREAM_DECODER_LENGTH_STATUS_UNSUPPORTED;
+	else if(fstat(fileno(decoder->private_->file), &filestats) != 0)
+		return FLAC__STREAM_DECODER_LENGTH_STATUS_ERROR;
+	else {
+		*stream_length = (FLAC__uint64)filestats.st_size;
+		return FLAC__STREAM_DECODER_LENGTH_STATUS_OK;
+	}
+}
+
+FLAC__bool file_eof_callback_(const FLAC__StreamDecoder *decoder, void *client_data)
+{
+	(void)client_data;
+
+	return feof(decoder->private_->file)? true : false;
 }
diff --git a/FLAC/stream_decoder_pp.cpp b/FLAC/stream_decoder_pp.cpp
index 10e15e0fac..8b76c95f54 100644
--- a/FLAC/stream_decoder_pp.cpp
+++ b/FLAC/stream_decoder_pp.cpp
@@ -1,5 +1,5 @@
 /* libFLAC++ - Free Lossless Audio Codec library
- * Copyright (C) 2002,2003,2004,2005  Josh Coalson
+ * Copyright (C) 2002,2003,2004,2005,2006,2007  Josh Coalson
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -40,6 +40,12 @@
 namespace FLAC {
 	namespace Decoder {
 
+		// ------------------------------------------------------------
+		//
+		// Stream
+		//
+		// ------------------------------------------------------------
+
 		Stream::Stream():
 		decoder_(::FLAC__stream_decoder_new())
 		{ }
@@ -47,7 +53,7 @@ namespace FLAC {
 		Stream::~Stream()
 		{
 			if(0 != decoder_) {
-				::FLAC__stream_decoder_finish(decoder_);
+				(void)::FLAC__stream_decoder_finish(decoder_);
 				::FLAC__stream_decoder_delete(decoder_);
 			}
 		}
@@ -57,6 +63,18 @@ namespace FLAC {
 			return 0 != decoder_;
 		}
 
+		bool Stream::set_ogg_serial_number(long value)
+		{
+			FLAC__ASSERT(is_valid());
+			return (bool)::FLAC__stream_decoder_set_ogg_serial_number(decoder_, value);
+		}
+
+		bool Stream::set_md5_checking(bool value)
+		{
+			FLAC__ASSERT(is_valid());
+			return (bool)::FLAC__stream_decoder_set_md5_checking(decoder_, value);
+		}
+
 		bool Stream::set_metadata_respond(::FLAC__MetadataType type)
 		{
 			FLAC__ASSERT(is_valid());
@@ -99,6 +117,18 @@ namespace FLAC {
 			return State(::FLAC__stream_decoder_get_state(decoder_));
 		}
 
+		bool Stream::get_md5_checking() const
+		{
+			FLAC__ASSERT(is_valid());
+			return (bool)::FLAC__stream_decoder_get_md5_checking(decoder_);
+		}
+
+		FLAC__uint64 Stream::get_total_samples() const
+		{
+			FLAC__ASSERT(is_valid());
+			return ::FLAC__stream_decoder_get_total_samples(decoder_);
+		}
+
 		unsigned Stream::get_channels() const
 		{
 			FLAC__ASSERT(is_valid());
@@ -129,21 +159,28 @@ namespace FLAC {
 			return ::FLAC__stream_decoder_get_blocksize(decoder_);
 		}
 
-		Stream::State Stream::init()
+		bool Stream::get_decode_position(FLAC__uint64 *position) const
 		{
 			FLAC__ASSERT(is_valid());
-			::FLAC__stream_decoder_set_read_callback(decoder_, read_callback_);
-			::FLAC__stream_decoder_set_write_callback(decoder_, write_callback_);
-			::FLAC__stream_decoder_set_metadata_callback(decoder_, metadata_callback_);
-			::FLAC__stream_decoder_set_error_callback(decoder_, error_callback_);
-			::FLAC__stream_decoder_set_client_data(decoder_, (void*)this);
-			return State(::FLAC__stream_decoder_init(decoder_));
+			return ::FLAC__stream_decoder_get_decode_position(decoder_, position);
 		}
 
-		void Stream::finish()
+		::FLAC__StreamDecoderInitStatus Stream::init()
 		{
 			FLAC__ASSERT(is_valid());
-			::FLAC__stream_decoder_finish(decoder_);
+			return ::FLAC__stream_decoder_init_stream(decoder_, read_callback_, seek_callback_, tell_callback_, length_callback_, eof_callback_, write_callback_, metadata_callback_, error_callback_, /*client_data=*/(void*)this);
+		}
+
+		::FLAC__StreamDecoderInitStatus Stream::init_ogg()
+		{
+			FLAC__ASSERT(is_valid());
+			return ::FLAC__stream_decoder_init_ogg_stream(decoder_, read_callback_, seek_callback_, tell_callback_, length_callback_, eof_callback_, write_callback_, metadata_callback_, error_callback_, /*client_data=*/(void*)this);
+		}
+
+		bool Stream::finish()
+		{
+			FLAC__ASSERT(is_valid());
+			return (bool)::FLAC__stream_decoder_finish(decoder_);
 		}
 
 		bool Stream::flush()
@@ -182,7 +219,41 @@ namespace FLAC {
 			return (bool)::FLAC__stream_decoder_skip_single_frame(decoder_);
 		}
 
-		::FLAC__StreamDecoderReadStatus Stream::read_callback_(const ::FLAC__StreamDecoder *decoder, FLAC__byte buffer[], unsigned *bytes, void *client_data)
+		bool Stream::seek_absolute(FLAC__uint64 sample)
+		{
+			FLAC__ASSERT(is_valid());
+			return (bool)::FLAC__stream_decoder_seek_absolute(decoder_, sample);
+		}
+
+		::FLAC__StreamDecoderSeekStatus Stream::seek_callback(FLAC__uint64 absolute_byte_offset)
+		{
+			(void)absolute_byte_offset;
+			return ::FLAC__STREAM_DECODER_SEEK_STATUS_UNSUPPORTED;
+		}
+
+		::FLAC__StreamDecoderTellStatus Stream::tell_callback(FLAC__uint64 *absolute_byte_offset)
+		{
+			(void)absolute_byte_offset;
+			return ::FLAC__STREAM_DECODER_TELL_STATUS_UNSUPPORTED;
+		}
+
+		::FLAC__StreamDecoderLengthStatus Stream::length_callback(FLAC__uint64 *stream_length)
+		{
+			(void)stream_length;
+			return ::FLAC__STREAM_DECODER_LENGTH_STATUS_UNSUPPORTED;
+		}
+
+		bool Stream::eof_callback()
+		{
+			return false;
+		}
+
+		void Stream::metadata_callback(const ::FLAC__StreamMetadata *metadata)
+		{
+			(void)metadata;
+		}
+
+		::FLAC__StreamDecoderReadStatus Stream::read_callback_(const ::FLAC__StreamDecoder *decoder, FLAC__byte buffer[], size_t *bytes, void *client_data)
 		{
 			(void)decoder;
 			FLAC__ASSERT(0 != client_data);
@@ -191,6 +262,42 @@ namespace FLAC {
 			return instance->read_callback(buffer, bytes);
 		}
 
+		::FLAC__StreamDecoderSeekStatus Stream::seek_callback_(const ::FLAC__StreamDecoder *decoder, FLAC__uint64 absolute_byte_offset, void *client_data)
+		{
+			(void) decoder;
+			FLAC__ASSERT(0 != client_data);
+			Stream *instance = reinterpret_cast<Stream *>(client_data);
+			FLAC__ASSERT(0 != instance);
+			return instance->seek_callback(absolute_byte_offset);
+		}
+
+		::FLAC__StreamDecoderTellStatus Stream::tell_callback_(const ::FLAC__StreamDecoder *decoder, FLAC__uint64 *absolute_byte_offset, void *client_data)
+		{
+			(void) decoder;
+			FLAC__ASSERT(0 != client_data);
+			Stream *instance = reinterpret_cast<Stream *>(client_data);
+			FLAC__ASSERT(0 != instance);
+			return instance->tell_callback(absolute_byte_offset);
+		}
+
+		::FLAC__StreamDecoderLengthStatus Stream::length_callback_(const ::FLAC__StreamDecoder *decoder, FLAC__uint64 *stream_length, void *client_data)
+		{
+			(void) decoder;
+			FLAC__ASSERT(0 != client_data);
+			Stream *instance = reinterpret_cast<Stream *>(client_data);
+			FLAC__ASSERT(0 != instance);
+			return instance->length_callback(stream_length);
+		}
+
+		FLAC__bool Stream::eof_callback_(const ::FLAC__StreamDecoder *decoder, void *client_data)
+		{
+			(void) decoder;
+			FLAC__ASSERT(0 != client_data);
+			Stream *instance = reinterpret_cast<Stream *>(client_data);
+			FLAC__ASSERT(0 != instance);
+			return instance->eof_callback();
+		}
+
 		::FLAC__StreamDecoderWriteStatus Stream::write_callback_(const ::FLAC__StreamDecoder *decoder, const ::FLAC__Frame *frame, const FLAC__int32 * const buffer[], void *client_data)
 		{
 			(void)decoder;
@@ -218,5 +325,55 @@ namespace FLAC {
 			instance->error_callback(status);
 		}
 
+		// ------------------------------------------------------------
+		//
+		// File
+		//
+		// ------------------------------------------------------------
+
+		File::File():
+			Stream()
+		{ }
+
+		File::~File()
+		{
+		}
+
+		::FLAC__StreamDecoderInitStatus File::init(FILE *file)
+		{
+			FLAC__ASSERT(0 != decoder_);
+			return ::FLAC__stream_decoder_init_FILE(decoder_, file, write_callback_, metadata_callback_, error_callback_, /*client_data=*/(void*)this);
+		}
+
+		::FLAC__StreamDecoderInitStatus File::init(const char *filename)
+		{
+			FLAC__ASSERT(0 != decoder_);
+			return ::FLAC__stream_decoder_init_file(decoder_, filename, write_callback_, metadata_callback_, error_callback_, /*client_data=*/(void*)this);
+		}
+
+		::FLAC__StreamDecoderInitStatus File::init_ogg(FILE *file)
+		{
+			FLAC__ASSERT(0 != decoder_);
+			return ::FLAC__stream_decoder_init_ogg_FILE(decoder_, file, write_callback_, metadata_callback_, error_callback_, /*client_data=*/(void*)this);
+		}
+
+		::FLAC__StreamDecoderInitStatus File::init_ogg(const char *filename)
+		{
+			FLAC__ASSERT(0 != decoder_);
+			return ::FLAC__stream_decoder_init_ogg_file(decoder_, filename, write_callback_, metadata_callback_, error_callback_, /*client_data=*/(void*)this);
+		}
+
+		// This is a dummy to satisfy the pure virtual from Stream; the
+		// read callback will never be called since we are initializing
+		// with FLAC__stream_decoder_init_FILE() or
+		// FLAC__stream_decoder_init_file() and those supply the read
+		// callback internally.
+		::FLAC__StreamDecoderReadStatus File::read_callback(FLAC__byte buffer[], size_t *bytes)
+		{
+			(void)buffer, (void)bytes;
+			FLAC__ASSERT(false);
+			return ::FLAC__STREAM_DECODER_READ_STATUS_ABORT; // double protection
+		}
+
 	}
 }
diff --git a/Makefile.linux b/Makefile.linux
index 080057c255..50d64a2a70 100644
--- a/Makefile.linux
+++ b/Makefile.linux
@@ -14,7 +14,7 @@ ifdef GC
   CFLAGS += -ffunction-sections
   LDFLAGS += -Wl,--gc-sections
 endif
-CFLAGS += -MMD -DHAVE_FILELENGTH -D__forceinline=inline -Izlib -IFLAC `sdl-config --cflags` `pkg-config gtk+-2.0 --cflags`
+CFLAGS += -MMD -DHAVE_FILELENGTH -D__forceinline=inline -Izlib `sdl-config --cflags` `pkg-config gtk+-2.0 --cflags`
 CFLAGS += -Dstricmp=strcasecmp -Dstrnicmp=strncasecmp -DNEED_STRUPR
 LDFLAGS += -lFLAC++ -lFLAC -lz -ljpeg -lfmod `sdl-config --libs` `pkg-config gtk+-2.0 --libs`
 NASMFLAGS += -f elf -DM_TARGET_LINUX
diff --git a/docs/rh-log.txt b/docs/rh-log.txt
index 9372faecc6..57f81422d7 100644
--- a/docs/rh-log.txt
+++ b/docs/rh-log.txt
@@ -1,3 +1,8 @@
+December 5, 2007
+- Fixed: The Linux makefile should use the include files for the system FLAC
+  installation, not the bundled copies which might not match what is installed.
+- Upgraded bundled FLAC from version 1.1.2 to version 1.2.1.
+
 November 28, 2007 (Changes by Graf Zahl)
 - after looking at the most recent Vavoom update I realized that A_FastChase should not
   use the multi-purpose special2 as counter for strafing so I added a new variable
diff --git a/src/sound/sample_flac.h b/src/sound/sample_flac.h
index b4ebdf5af3..dfe16aa659 100644
--- a/src/sound/sample_flac.h
+++ b/src/sound/sample_flac.h
@@ -14,21 +14,21 @@ public:
 	FSOUND_SAMPLE *LoadSample (unsigned int samplemode);
 	BYTE *ReadSample (SDWORD *numbytes);
 
-	unsigned NumChannels, SampleBits, SampleRate, NumSamples;
+	unsigned NumChannels, SampleBits, SampleRate, NumSamples;
 
 protected:
-	virtual ::FLAC__StreamDecoderReadStatus read_callback(FLAC__byte buffer[], unsigned *bytes);
-	virtual ::FLAC__StreamDecoderWriteStatus write_callback(const ::FLAC__Frame *frame, const FLAC__int32 * const buffer[]);
-	virtual void metadata_callback(const ::FLAC__StreamMetadata *metadata);
-	virtual void error_callback(::FLAC__StreamDecoderErrorStatus status);
-
-	void CopyToSample (size_t ofs, FLAC__int32 **buffer, size_t samples);
-
-	FWadLump File;
-	long StartPos, EndPos;
-
-	void *SBuff, *SBuff2;
-	unsigned int SLen, SLen2;
-	sfxinfo_t *Sfx;
-	bool Dest8;
+	virtual ::FLAC__StreamDecoderReadStatus read_callback(FLAC__byte buffer[], unsigned *bytes);
+	virtual ::FLAC__StreamDecoderWriteStatus write_callback(const ::FLAC__Frame *frame, const FLAC__int32 * const buffer[]);
+	virtual void metadata_callback(const ::FLAC__StreamMetadata *metadata);
+	virtual void error_callback(::FLAC__StreamDecoderErrorStatus status);
+
+	void CopyToSample (size_t ofs, FLAC__int32 **buffer, size_t samples);
+
+	FWadLump File;
+	long StartPos, EndPos;
+
+	void *SBuff, *SBuff2;
+	unsigned int SLen, SLen2;
+	sfxinfo_t *Sfx;
+	bool Dest8;
 };
diff --git a/tools/updaterevision/updaterevision.vcproj b/tools/updaterevision/updaterevision.vcproj
index f39914dcac..202c9d042b 100644
--- a/tools/updaterevision/updaterevision.vcproj
+++ b/tools/updaterevision/updaterevision.vcproj
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="Windows-1252"?>
 <VisualStudioProject
 	ProjectType="Visual C++"
-	Version="8,00"
+	Version="8.00"
 	Name="updaterevision"
 	ProjectGUID="{6077B7D6-349F-4077-B552-3BC302EF5859}"
 	RootNamespace="updaterevision"
@@ -95,6 +95,82 @@
 				Name="VCPostBuildEventTool"
 			/>
 		</Configuration>
+		<Configuration
+			Name="Debug|x64"
+			OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				UsePrecompiledHeader="0"
+				WarningLevel="3"
+				Detect64BitPortabilityProblems="true"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="2"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				TargetMachine="17"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCWebDeploymentTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
 		<Configuration
 			Name="Release|Win32"
 			OutputDirectory="$(SolutionDir)$(ConfigurationName)"
@@ -172,82 +248,6 @@
 				Name="VCPostBuildEventTool"
 			/>
 		</Configuration>
-		<Configuration
-			Name="Debug|x64"
-			OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)"
-			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
-			ConfigurationType="1"
-			CharacterSet="1"
-			>
-			<Tool
-				Name="VCPreBuildEventTool"
-			/>
-			<Tool
-				Name="VCCustomBuildTool"
-			/>
-			<Tool
-				Name="VCXMLDataGeneratorTool"
-			/>
-			<Tool
-				Name="VCWebServiceProxyGeneratorTool"
-			/>
-			<Tool
-				Name="VCMIDLTool"
-				TargetEnvironment="3"
-			/>
-			<Tool
-				Name="VCCLCompilerTool"
-				Optimization="0"
-				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
-				MinimalRebuild="true"
-				BasicRuntimeChecks="3"
-				RuntimeLibrary="3"
-				UsePrecompiledHeader="0"
-				WarningLevel="3"
-				Detect64BitPortabilityProblems="true"
-				DebugInformationFormat="3"
-			/>
-			<Tool
-				Name="VCManagedResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCResourceCompilerTool"
-			/>
-			<Tool
-				Name="VCPreLinkEventTool"
-			/>
-			<Tool
-				Name="VCLinkerTool"
-				LinkIncremental="2"
-				GenerateDebugInformation="true"
-				SubSystem="1"
-				TargetMachine="17"
-			/>
-			<Tool
-				Name="VCALinkTool"
-			/>
-			<Tool
-				Name="VCManifestTool"
-			/>
-			<Tool
-				Name="VCXDCMakeTool"
-			/>
-			<Tool
-				Name="VCBscMakeTool"
-			/>
-			<Tool
-				Name="VCFxCopTool"
-			/>
-			<Tool
-				Name="VCAppVerifierTool"
-			/>
-			<Tool
-				Name="VCWebDeploymentTool"
-			/>
-			<Tool
-				Name="VCPostBuildEventTool"
-			/>
-		</Configuration>
 		<Configuration
 			Name="Release|x64"
 			OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)"